/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "asm_support_mips.S"

#include "arch/quick_alloc_entrypoints.S"

    .set noreorder
    .balign 4

    /* Deliver the given exception */
    .extern artDeliverExceptionFromCode
    /* Deliver an exception pending on a thread */
    .extern artDeliverPendingExceptionFromCode

#define ARG_SLOT_SIZE   32    // space for a0-a3 plus 4 more words

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
     * Callee-save: $s0-$s8 + $gp + $ra, 11 total + 1 word for Method*
     * Clobbers $t0 and $sp
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack
     */
.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    addiu  $sp, $sp, -112
    .cfi_adjust_cfa_offset 112

     // Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 112)
#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected."
#endif

    sw     $ra, 108($sp)
    .cfi_rel_offset 31, 108
    sw     $s8, 104($sp)
    .cfi_rel_offset 30, 104
    sw     $gp, 100($sp)
    .cfi_rel_offset 28, 100
    sw     $s7, 96($sp)
    .cfi_rel_offset 23, 96
    sw     $s6, 92($sp)
    .cfi_rel_offset 22, 92
    sw     $s5, 88($sp)
    .cfi_rel_offset 21, 88
    sw     $s4, 84($sp)
    .cfi_rel_offset 20, 84
    sw     $s3, 80($sp)
    .cfi_rel_offset 19, 80
    sw     $s2, 76($sp)
    .cfi_rel_offset 18, 76
    sw     $s1, 72($sp)
    .cfi_rel_offset 17, 72
    sw     $s0, 68($sp)
    .cfi_rel_offset 16, 68
    // 4-byte placeholder for register $zero, serving for alignment
    // of the following double precision floating point registers.

    CHECK_ALIGNMENT $sp, $t1
    sdc1   $f30, 56($sp)
    sdc1   $f28, 48($sp)
    sdc1   $f26, 40($sp)
    sdc1   $f24, 32($sp)
    sdc1   $f22, 24($sp)
    sdc1   $f20, 16($sp)

    # 1 word for holding Method* plus 12 bytes padding to keep contents of SP
    # a multiple of 16.

    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
    lw $t0, 0($t0)
    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET($t0)
    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). Restoration assumes non-moving GC.
     * Does not include rSUSPEND or rSELF
     * callee-save: $s2-$s8 + $gp + $ra, 9 total + 2 words padding + 1 word to hold Method*
     * Clobbers $t0 and $sp
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_REFS_ONLY + ARG_SLOT_SIZE bytes on the stack
     */
.macro SETUP_SAVE_REFS_ONLY_FRAME
    addiu  $sp, $sp, -48
    .cfi_adjust_cfa_offset 48

    // Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_SAVE_REFS_ONLY != 48)
#error "FRAME_SIZE_SAVE_REFS_ONLY(MIPS) size not as expected."
#endif

    sw     $ra, 44($sp)
    .cfi_rel_offset 31, 44
    sw     $s8, 40($sp)
    .cfi_rel_offset 30, 40
    sw     $gp, 36($sp)
    .cfi_rel_offset 28, 36
    sw     $s7, 32($sp)
    .cfi_rel_offset 23, 32
    sw     $s6, 28($sp)
    .cfi_rel_offset 22, 28
    sw     $s5, 24($sp)
    .cfi_rel_offset 21, 24
    sw     $s4, 20($sp)
    .cfi_rel_offset 20, 20
    sw     $s3, 16($sp)
    .cfi_rel_offset 19, 16
    sw     $s2, 12($sp)
    .cfi_rel_offset 18, 12
    # 2 words for alignment and bottom word will hold Method*

    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
    lw $t0, 0($t0)
    lw $t0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET($t0)
    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
.endm

.macro RESTORE_SAVE_REFS_ONLY_FRAME
    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
    lw     $ra, 44($sp)
    .cfi_restore 31
    lw     $s8, 40($sp)
    .cfi_restore 30
    lw     $gp, 36($sp)
    .cfi_restore 28
    lw     $s7, 32($sp)
    .cfi_restore 23
    lw     $s6, 28($sp)
    .cfi_restore 22
    lw     $s5, 24($sp)
    .cfi_restore 21
    lw     $s4, 20($sp)
    .cfi_restore 20
    lw     $s3, 16($sp)
    .cfi_restore 19
    lw     $s2, 12($sp)
    .cfi_restore 18
    addiu  $sp, $sp, 48
    .cfi_adjust_cfa_offset -48
.endm

.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
    RESTORE_SAVE_REFS_ONLY_FRAME
    jalr   $zero, $ra
    nop
.endm

    /*
     * Individually usable part of macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY.
     */
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
    sw      $s8, 104($sp)
    .cfi_rel_offset 30, 104
    sw      $s7, 96($sp)
    .cfi_rel_offset 23, 96
    sw      $s6, 92($sp)
    .cfi_rel_offset 22, 92
    sw      $s5, 88($sp)
    .cfi_rel_offset 21, 88
    sw      $s4, 84($sp)
    .cfi_rel_offset 20, 84
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
     * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
     *              (26 total + 1 word padding + method*)
     */
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY save_s4_thru_s8=1
    addiu   $sp, $sp, -112
    .cfi_adjust_cfa_offset 112

    // Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 112)
#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
#endif

    sw      $ra, 108($sp)
    .cfi_rel_offset 31, 108
    sw      $gp, 100($sp)
    .cfi_rel_offset 28, 100
    .if \save_s4_thru_s8
      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
    .endif
    sw      $s3, 80($sp)
    .cfi_rel_offset 19, 80
    sw      $s2, 76($sp)
    .cfi_rel_offset 18, 76
    sw      $t1, 72($sp)
    .cfi_rel_offset 9, 72
    sw      $t0, 68($sp)
    .cfi_rel_offset 8, 68
    sw      $a3, 64($sp)
    .cfi_rel_offset 7, 64
    sw      $a2, 60($sp)
    .cfi_rel_offset 6, 60
    sw      $a1, 56($sp)
    .cfi_rel_offset 5, 56
    CHECK_ALIGNMENT $sp, $t8
    sdc1    $f18, 48($sp)
    sdc1    $f16, 40($sp)
    sdc1    $f14, 32($sp)
    sdc1    $f12, 24($sp)
    sdc1    $f10, 16($sp)
    sdc1    $f8,   8($sp)
    # bottom will hold Method*
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
     * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
     *              (26 total + 1 word padding + method*)
     * Clobbers $t0 and $sp
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
     */
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME save_s4_thru_s8_only=0
    .if \save_s4_thru_s8_only
      // It is expected that `SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY /* save_s4_thru_s8 */ 0`
      // has been done prior to `SETUP_SAVE_REFS_AND_ARGS_FRAME /* save_s4_thru_s8_only */ 1`.
      SETUP_SAVE_REFS_AND_ARGS_FRAME_S4_THRU_S8
    .else
      SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    .endif
    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
    lw $t0, 0($t0)
    lw $t0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET($t0)
    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
     * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
     *              (26 total + 1 word padding + method*)
     * Clobbers $sp
     * Use $a0 as the Method* and loads it into bottom of stack.
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
     */
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    sw $a0, 0($sp)                                # Place Method* at bottom of stack.
    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
.endm

    /*
     * Individually usable part of macro RESTORE_SAVE_REFS_AND_ARGS_FRAME.
     */
.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP
    lw      $gp, 100($sp)
    .cfi_restore 28
.endm

    /*
     * Individually usable part of macro RESTORE_SAVE_REFS_AND_ARGS_FRAME.
     */
.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
    lw      $a1, 56($sp)
    .cfi_restore 5
.endm

.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME restore_s4_thru_s8=1, remove_arg_slots=1
    .if \remove_arg_slots
      addiu $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
      .cfi_adjust_cfa_offset -ARG_SLOT_SIZE
    .endif
    lw      $ra, 108($sp)
    .cfi_restore 31
    .if \restore_s4_thru_s8
      lw    $s8, 104($sp)
      .cfi_restore 30
    .endif
    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP
    .if \restore_s4_thru_s8
      lw    $s7, 96($sp)
      .cfi_restore 23
      lw    $s6, 92($sp)
      .cfi_restore 22
      lw    $s5, 88($sp)
      .cfi_restore 21
      lw    $s4, 84($sp)
      .cfi_restore 20
    .endif
    lw      $s3, 80($sp)
    .cfi_restore 19
    lw      $s2, 76($sp)
    .cfi_restore 18
    lw      $t1, 72($sp)
    .cfi_restore 9
    lw      $t0, 68($sp)
    .cfi_restore 8
    lw      $a3, 64($sp)
    .cfi_restore 7
    lw      $a2, 60($sp)
    .cfi_restore 6
    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1
    CHECK_ALIGNMENT $sp, $t8
    ldc1    $f18, 48($sp)
    ldc1    $f16, 40($sp)
    ldc1    $f14, 32($sp)
    ldc1    $f12, 24($sp)
    ldc1    $f10, 16($sp)
    ldc1    $f8,   8($sp)
    addiu   $sp, $sp, 112                           # Pop frame.
    .cfi_adjust_cfa_offset -112
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
     * when the $sp has already been decremented by FRAME_SIZE_SAVE_EVERYTHING.
     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
     * Clobbers $t0 and $t1.
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
     */
.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
     // Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_SAVE_EVERYTHING != 256)
#error "FRAME_SIZE_SAVE_EVERYTHING(MIPS) size not as expected."
#endif

    sw     $ra, 252($sp)
    .cfi_rel_offset 31, 252
    sw     $fp, 248($sp)
    .cfi_rel_offset 30, 248
    sw     $gp, 244($sp)
    .cfi_rel_offset 28, 244
    sw     $t9, 240($sp)
    .cfi_rel_offset 25, 240
    sw     $t8, 236($sp)
    .cfi_rel_offset 24, 236
    sw     $s7, 232($sp)
    .cfi_rel_offset 23, 232
    sw     $s6, 228($sp)
    .cfi_rel_offset 22, 228
    sw     $s5, 224($sp)
    .cfi_rel_offset 21, 224
    sw     $s4, 220($sp)
    .cfi_rel_offset 20, 220
    sw     $s3, 216($sp)
    .cfi_rel_offset 19, 216
    sw     $s2, 212($sp)
    .cfi_rel_offset 18, 212
    sw     $s1, 208($sp)
    .cfi_rel_offset 17, 208
    sw     $s0, 204($sp)
    .cfi_rel_offset 16, 204
    sw     $t7, 200($sp)
    .cfi_rel_offset 15, 200
    sw     $t6, 196($sp)
    .cfi_rel_offset 14, 196
    sw     $t5, 192($sp)
    .cfi_rel_offset 13, 192
    sw     $t4, 188($sp)
    .cfi_rel_offset 12, 188
    sw     $t3, 184($sp)
    .cfi_rel_offset 11, 184
    sw     $t2, 180($sp)
    .cfi_rel_offset 10, 180
    sw     $t1, 176($sp)
    .cfi_rel_offset 9, 176
    sw     $t0, 172($sp)
    .cfi_rel_offset 8, 172
    sw     $a3, 168($sp)
    .cfi_rel_offset 7, 168
    sw     $a2, 164($sp)
    .cfi_rel_offset 6, 164
    sw     $a1, 160($sp)
    .cfi_rel_offset 5, 160
    sw     $a0, 156($sp)
    .cfi_rel_offset 4, 156
    sw     $v1, 152($sp)
    .cfi_rel_offset 3, 152
    sw     $v0, 148($sp)
    .cfi_rel_offset 2, 148

    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
    bal 1f
    .set push
    .set noat
    sw     $at, 144($sp)
    .cfi_rel_offset 1, 144
    .set pop
1:
    .cpload $ra

    CHECK_ALIGNMENT $sp, $t1
    sdc1   $f30, 136($sp)
    sdc1   $f28, 128($sp)
    sdc1   $f26, 120($sp)
    sdc1   $f24, 112($sp)
    sdc1   $f22, 104($sp)
    sdc1   $f20,  96($sp)
    sdc1   $f18,  88($sp)
    sdc1   $f16,  80($sp)
    sdc1   $f14,  72($sp)
    sdc1   $f12,  64($sp)
    sdc1   $f10,  56($sp)
    sdc1   $f8,   48($sp)
    sdc1   $f6,   40($sp)
    sdc1   $f4,   32($sp)
    sdc1   $f2,   24($sp)
    sdc1   $f0,   16($sp)

    # 3 words padding and 1 word for holding Method*

    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
    lw $t0, 0($t0)
    lw $t0, \runtime_method_offset($t0)
    sw $t0, 0($sp)                                # Place Method* at bottom of stack.
    sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
    addiu  $sp, $sp, -ARG_SLOT_SIZE               # reserve argument slots on the stack
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
.endm

    /*
     * Macro that sets up the callee save frame to conform with
     * Runtime::CreateCalleeSaveMethod(kSaveEverything).
     * Callee-save: $at, $v0-$v1, $a0-$a3, $t0-$t7, $s0-$s7, $t8-$t9, $gp, $fp $ra, $f0-$f31;
     *              28(GPR)+ 32(FPR) + 3 words for padding and 1 word for Method*
     * Clobbers $t0 and $t1.
     * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
     * Reserves FRAME_SIZE_SAVE_EVERYTHING + ARG_SLOT_SIZE bytes on the stack.
     * This macro sets up $gp; entrypoints using it should start with ENTRY_NO_GP.
     */
.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
    addiu  $sp, $sp, -(FRAME_SIZE_SAVE_EVERYTHING)
    .cfi_adjust_cfa_offset (FRAME_SIZE_SAVE_EVERYTHING)
    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP \runtime_method_offset
.endm

.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1
    addiu  $sp, $sp, ARG_SLOT_SIZE                # remove argument slots on the stack
    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE

    CHECK_ALIGNMENT $sp, $t1
    ldc1   $f30, 136($sp)
    ldc1   $f28, 128($sp)
    ldc1   $f26, 120($sp)
    ldc1   $f24, 112($sp)
    ldc1   $f22, 104($sp)
    ldc1   $f20,  96($sp)
    ldc1   $f18,  88($sp)
    ldc1   $f16,  80($sp)
    ldc1   $f14,  72($sp)
    ldc1   $f12,  64($sp)
    ldc1   $f10,  56($sp)
    ldc1   $f8,   48($sp)
    ldc1   $f6,   40($sp)
    ldc1   $f4,   32($sp)
    ldc1   $f2,   24($sp)
    ldc1   $f0,   16($sp)

    lw     $ra, 252($sp)
    .cfi_restore 31
    lw     $fp, 248($sp)
    .cfi_restore 30
    lw     $gp, 244($sp)
    .cfi_restore 28
    lw     $t9, 240($sp)
    .cfi_restore 25
    lw     $t8, 236($sp)
    .cfi_restore 24
    lw     $s7, 232($sp)
    .cfi_restore 23
    lw     $s6, 228($sp)
    .cfi_restore 22
    lw     $s5, 224($sp)
    .cfi_restore 21
    lw     $s4, 220($sp)
    .cfi_restore 20
    lw     $s3, 216($sp)
    .cfi_restore 19
    lw     $s2, 212($sp)
    .cfi_restore 18
    lw     $s1, 208($sp)
    .cfi_restore 17
    lw     $s0, 204($sp)
    .cfi_restore 16
    lw     $t7, 200($sp)
    .cfi_restore 15
    lw     $t6, 196($sp)
    .cfi_restore 14
    lw     $t5, 192($sp)
    .cfi_restore 13
    lw     $t4, 188($sp)
    .cfi_restore 12
    lw     $t3, 184($sp)
    .cfi_restore 11
    lw     $t2, 180($sp)
    .cfi_restore 10
    lw     $t1, 176($sp)
    .cfi_restore 9
    lw     $t0, 172($sp)
    .cfi_restore 8
    lw     $a3, 168($sp)
    .cfi_restore 7
    lw     $a2, 164($sp)
    .cfi_restore 6
    lw     $a1, 160($sp)
    .cfi_restore 5
    .if \restore_a0
    lw     $a0, 156($sp)
    .cfi_restore 4
    .endif
    lw     $v1, 152($sp)
    .cfi_restore 3
    lw     $v0, 148($sp)
    .cfi_restore 2
    .set push
    .set noat
    lw     $at, 144($sp)
    .cfi_restore 1
    .set pop

    addiu  $sp, $sp, 256            # pop frame
    .cfi_adjust_cfa_offset -256
.endm

    /*
     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
     * Requires $gp properly set up.
     */
.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
    la      $t9, artDeliverPendingExceptionFromCode
    jalr    $zero, $t9                   # artDeliverPendingExceptionFromCode(Thread*)
    move    $a0, rSELF                   # pass Thread::Current
.endm

    /*
     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
     * exception is Thread::Current()->exception_.
     * Requires $gp properly set up.
     */
.macro DELIVER_PENDING_EXCEPTION
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME    # save callee saves for throw
    DELIVER_PENDING_EXCEPTION_FRAME_READY
.endm

.macro RETURN_IF_NO_EXCEPTION
    lw     $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    RESTORE_SAVE_REFS_ONLY_FRAME
    bnez   $t0, 1f                       # success if no exception is pending
    nop
    jalr   $zero, $ra
    nop
1:
    DELIVER_PENDING_EXCEPTION
.endm

.macro RETURN_IF_ZERO
    RESTORE_SAVE_REFS_ONLY_FRAME
    bnez   $v0, 1f                       # success?
    nop
    jalr   $zero, $ra                    # return on success
    nop
1:
    DELIVER_PENDING_EXCEPTION
.endm

.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    RESTORE_SAVE_REFS_ONLY_FRAME
    beqz   $v0, 1f                       # success?
    nop
    jalr   $zero, $ra                    # return on success
    nop
1:
    DELIVER_PENDING_EXCEPTION
.endm

    /*
     * On stack replacement stub.
     * On entry:
     *   a0 = stack to copy
     *   a1 = size of stack
     *   a2 = pc to call
     *   a3 = JValue* result
     *   [sp + 16] = shorty
     *   [sp + 20] = thread
     */
ENTRY art_quick_osr_stub
    // Save callee general purpose registers, RA and GP.
    addiu  $sp, $sp, -48
    .cfi_adjust_cfa_offset 48
    sw     $ra, 44($sp)
    .cfi_rel_offset 31, 44
    sw     $s8, 40($sp)
    .cfi_rel_offset 30, 40
    sw     $gp, 36($sp)
    .cfi_rel_offset 28, 36
    sw     $s7, 32($sp)
    .cfi_rel_offset 23, 32
    sw     $s6, 28($sp)
    .cfi_rel_offset 22, 28
    sw     $s5, 24($sp)
    .cfi_rel_offset 21, 24
    sw     $s4, 20($sp)
    .cfi_rel_offset 20, 20
    sw     $s3, 16($sp)
    .cfi_rel_offset 19, 16
    sw     $s2, 12($sp)
    .cfi_rel_offset 18, 12
    sw     $s1, 8($sp)
    .cfi_rel_offset 17, 8
    sw     $s0, 4($sp)
    .cfi_rel_offset 16, 4

    move   $s8, $sp                        # Save the stack pointer
    move   $s7, $a1                        # Save size of stack
    move   $s6, $a2                        # Save the pc to call
    lw     rSELF, 48+20($sp)               # Save managed thread pointer into rSELF
    addiu  $t0, $sp, -12                   # Reserve space for stack pointer,
                                           #    JValue* result, and ArtMethod* slot.
    srl    $t0, $t0, 4                     # Align stack pointer to 16 bytes
    sll    $sp, $t0, 4                     # Update stack pointer
    sw     $s8, 4($sp)                     # Save old stack pointer
    sw     $a3, 8($sp)                     # Save JValue* result
    sw     $zero, 0($sp)                   # Store null for ArtMethod* at bottom of frame
    subu   $sp, $a1                        # Reserve space for callee stack
    move   $a2, $a1
    move   $a1, $a0
    move   $a0, $sp
    la     $t9, memcpy
    jalr   $t9                             # memcpy (dest a0, src a1, bytes a2)
    addiu  $sp, $sp, -16                   # make space for argument slots for memcpy
    bal    .Losr_entry                     # Call the method
    addiu  $sp, $sp, 16                    # restore stack after memcpy
    lw     $a2, 8($sp)                     # Restore JValue* result
    lw     $sp, 4($sp)                     # Restore saved stack pointer
    lw     $a0, 48+16($sp)                 # load shorty
    lbu    $a0, 0($a0)                     # load return type
    li     $a1, 'D'                        # put char 'D' into a1
    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'D'
    li     $a1, 'F'                        # put char 'F' into a1
    beq    $a0, $a1, .Losr_fp_result       # Test if result type char == 'F'
    nop
    sw     $v0, 0($a2)
    b      .Losr_exit
    sw     $v1, 4($a2)                     # store v0/v1 into result
.Losr_fp_result:
    CHECK_ALIGNMENT $a2, $t0, 8
    sdc1   $f0, 0($a2)                     # store f0/f1 into result
.Losr_exit:
    lw     $ra, 44($sp)
    .cfi_restore 31
    lw     $s8, 40($sp)
    .cfi_restore 30
    lw     $gp, 36($sp)
    .cfi_restore 28
    lw     $s7, 32($sp)
    .cfi_restore 23
    lw     $s6, 28($sp)
    .cfi_restore 22
    lw     $s5, 24($sp)
    .cfi_restore 21
    lw     $s4, 20($sp)
    .cfi_restore 20
    lw     $s3, 16($sp)
    .cfi_restore 19
    lw     $s2, 12($sp)
    .cfi_restore 18
    lw     $s1, 8($sp)
    .cfi_restore 17
    lw     $s0, 4($sp)
    .cfi_restore 16
    jalr   $zero, $ra
    addiu  $sp, $sp, 48
    .cfi_adjust_cfa_offset -48
.Losr_entry:
    addiu  $s7, $s7, -4
    addu   $t0, $s7, $sp
    move   $t9, $s6
    jalr   $zero, $t9
    sw     $ra, 0($t0)                     # Store RA per the compiler ABI
END art_quick_osr_stub

    /*
     * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_.
     * Note that fprs_ is expected to be an address that is a multiple of 8.
     * FIXME: just guessing about the shape of the jmpbuf.  Where will pc be?
     */
ENTRY art_quick_do_long_jump
    CHECK_ALIGNMENT $a1, $t1, 8
    ldc1    $f0,   0*8($a1)
    ldc1    $f2,   1*8($a1)
    ldc1    $f4,   2*8($a1)
    ldc1    $f6,   3*8($a1)
    ldc1    $f8,   4*8($a1)
    ldc1    $f10,  5*8($a1)
    ldc1    $f12,  6*8($a1)
    ldc1    $f14,  7*8($a1)
    ldc1    $f16,  8*8($a1)
    ldc1    $f18,  9*8($a1)
    ldc1    $f20, 10*8($a1)
    ldc1    $f22, 11*8($a1)
    ldc1    $f24, 12*8($a1)
    ldc1    $f26, 13*8($a1)
    ldc1    $f28, 14*8($a1)
    ldc1    $f30, 15*8($a1)

    .set push
    .set nomacro
    .set noat
    lw      $at, 4($a0)
    .set pop
    lw      $v0, 8($a0)
    lw      $v1, 12($a0)
    lw      $a1, 20($a0)
    lw      $a2, 24($a0)
    lw      $a3, 28($a0)
    lw      $t0, 32($a0)
    lw      $t1, 36($a0)
    lw      $t2, 40($a0)
    lw      $t3, 44($a0)
    lw      $t4, 48($a0)
    lw      $t5, 52($a0)
    lw      $t6, 56($a0)
    lw      $t7, 60($a0)
    lw      $s0, 64($a0)
    lw      $s1, 68($a0)
    lw      $s2, 72($a0)
    lw      $s3, 76($a0)
    lw      $s4, 80($a0)
    lw      $s5, 84($a0)
    lw      $s6, 88($a0)
    lw      $s7, 92($a0)
    lw      $t8, 96($a0)
    lw      $t9, 100($a0)
    lw      $gp, 112($a0)
    lw      $sp, 116($a0)
    lw      $fp, 120($a0)
    lw      $ra, 124($a0)
    lw      $a0, 16($a0)
    move    $v0, $zero          # clear result registers v0 and v1 (in branch delay slot)
    jalr    $zero, $t9          # do long jump
    move    $v1, $zero
END art_quick_do_long_jump

    /*
     * Called by managed code, saves most registers (forms basis of long jump context) and passes
     * the bottom of the stack. artDeliverExceptionFromCode will place the callee save Method* at
     * the bottom of the thread. On entry a0 holds Throwable*
     */
ENTRY art_quick_deliver_exception
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    la   $t9, artDeliverExceptionFromCode
    jalr $zero, $t9                 # artDeliverExceptionFromCode(Throwable*, Thread*)
    move $a1, rSELF                 # pass Thread::Current
END art_quick_deliver_exception

    /*
     * Called by managed code to create and deliver a NullPointerException
     */
    .extern artThrowNullPointerExceptionFromCode
ENTRY_NO_GP art_quick_throw_null_pointer_exception
    // Note that setting up $gp does not rely on $t9 here, so branching here directly is OK,
    // even after clobbering any registers we don't need to preserve, such as $gp or $t0.
    SETUP_SAVE_EVERYTHING_FRAME
    la   $t9, artThrowNullPointerExceptionFromCode
    jalr $zero, $t9                 # artThrowNullPointerExceptionFromCode(Thread*)
    move $a0, rSELF                 # pass Thread::Current
END art_quick_throw_null_pointer_exception


    /*
     * Call installed by a signal handler to create and deliver a NullPointerException.
     */
    .extern artThrowNullPointerExceptionFromSignal
ENTRY_NO_GP_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, FRAME_SIZE_SAVE_EVERYTHING
    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP
    # Retrieve the fault address from the padding where the signal handler stores it.
    lw   $a0, (ARG_SLOT_SIZE + __SIZEOF_POINTER__)($sp)
    la   $t9, artThrowNullPointerExceptionFromSignal
    jalr $zero, $t9                 # artThrowNullPointerExceptionFromSignal(uintptr_t, Thread*)
    move $a1, rSELF                 # pass Thread::Current
END art_quick_throw_null_pointer_exception_from_signal

    /*
     * Called by managed code to create and deliver an ArithmeticException
     */
    .extern artThrowDivZeroFromCode
ENTRY_NO_GP art_quick_throw_div_zero
    SETUP_SAVE_EVERYTHING_FRAME
    la   $t9, artThrowDivZeroFromCode
    jalr $zero, $t9                 # artThrowDivZeroFromCode(Thread*)
    move $a0, rSELF                 # pass Thread::Current
END art_quick_throw_div_zero

    /*
     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException
     */
    .extern artThrowArrayBoundsFromCode
ENTRY_NO_GP art_quick_throw_array_bounds
    // Note that setting up $gp does not rely on $t9 here, so branching here directly is OK,
    // even after clobbering any registers we don't need to preserve, such as $gp or $t0.
    SETUP_SAVE_EVERYTHING_FRAME
    la   $t9, artThrowArrayBoundsFromCode
    jalr $zero, $t9                 # artThrowArrayBoundsFromCode(index, limit, Thread*)
    move $a2, rSELF                 # pass Thread::Current
END art_quick_throw_array_bounds

    /*
     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
     * as if thrown from a call to String.charAt().
     */
    .extern artThrowStringBoundsFromCode
ENTRY_NO_GP art_quick_throw_string_bounds
    SETUP_SAVE_EVERYTHING_FRAME
    la   $t9, artThrowStringBoundsFromCode
    jalr $zero, $t9                 # artThrowStringBoundsFromCode(index, limit, Thread*)
    move $a2, rSELF                 # pass Thread::Current
END art_quick_throw_string_bounds

    /*
     * Called by managed code to create and deliver a StackOverflowError.
     */
    .extern artThrowStackOverflowFromCode
ENTRY art_quick_throw_stack_overflow
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    la   $t9, artThrowStackOverflowFromCode
    jalr $zero, $t9                 # artThrowStackOverflowFromCode(Thread*)
    move $a0, rSELF                 # pass Thread::Current
END art_quick_throw_stack_overflow

    /*
     * All generated callsites for interface invokes and invocation slow paths will load arguments
     * as usual - except instead of loading arg0/$a0 with the target Method*, arg0/$a0 will contain
     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
     * NOTE: "this" is first visable argument of the target, and so can be found in arg1/$a1.
     *
     * The helper will attempt to locate the target and return a 64-bit result in $v0/$v1 consisting
     * of the target Method* in $v0 and method->code_ in $v1.
     *
     * If unsuccessful, the helper will return null/null. There will be a pending exception in the
     * thread and we branch to another stub to deliver it.
     *
     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
     * pointing back to the original caller.
     */
.macro INVOKE_TRAMPOLINE_BODY cxx_name, save_s4_thru_s8_only=0
    .extern \cxx_name
    SETUP_SAVE_REFS_AND_ARGS_FRAME \save_s4_thru_s8_only  # save callee saves in case
                                                          # allocation triggers GC
    move  $a2, rSELF                       # pass Thread::Current
    la    $t9, \cxx_name
    jalr  $t9                              # (method_idx, this, Thread*, $sp)
    addiu $a3, $sp, ARG_SLOT_SIZE          # pass $sp (remove arg slots)
    move  $a0, $v0                         # save target Method*
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    beqz  $v0, 1f
    move  $t9, $v1                         # save $v0->code_
    jalr  $zero, $t9
    nop
1:
    DELIVER_PENDING_EXCEPTION
.endm
.macro INVOKE_TRAMPOLINE c_name, cxx_name
ENTRY \c_name
    INVOKE_TRAMPOLINE_BODY \cxx_name
END \c_name
.endm

INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck

INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck

// Each of the following macros expands into four instructions or 16 bytes.
// They are used to build indexable "tables" of code.

.macro LOAD_WORD_TO_REG reg, next_arg, index_reg, label
    lw    $\reg, -4($\next_arg)   # next_arg points to argument after the current one (offset is 4)
    b     \label
    addiu $\index_reg, 16
    .balign 16
.endm

.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index_reg, next_index, label
    lw    $\reg1, -8($\next_arg)  # next_arg points to argument after the current one (offset is 8)
    lw    $\reg2, -4($\next_arg)
    b     \label
    li    $\index_reg, \next_index
    .balign 16
.endm

.macro LOAD_FLOAT_TO_REG reg, next_arg, index_reg, label
    lwc1  $\reg, -4($\next_arg)   # next_arg points to argument after the current one (offset is 4)
    b     \label
    addiu $\index_reg, 16
    .balign 16
.endm

#if defined(__mips_isa_rev) && __mips_isa_rev > 2
// LDu expands into 3 instructions for 64-bit FPU, so index_reg cannot be updated here.
.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
    .set reorder                                # force use of the branch delay slot
    LDu  $\reg1, $\reg2, -8, $\next_arg, $\tmp  # next_arg points to argument after the current one
                                                # (offset is 8)
    b     \label
    .set noreorder
    .balign 16
.endm
#else
// LDu expands into 2 instructions for 32-bit FPU, so index_reg is updated here.
.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
    LDu  $\reg1, $\reg2, -8, $\next_arg, $\tmp  # next_arg points to argument after the current one
                                                # (offset is 8)
    b     \label
    addiu $\index_reg, 16
    .balign 16
.endm
#endif

.macro LOAD_END index_reg, next_index, label
    b     \label
    li    $\index_reg, \next_index
    .balign 16
.endm

#define SPILL_SIZE    32

    /*
     * Invocation stub for quick code.
     * On entry:
     *   a0 = method pointer
     *   a1 = argument array or null for no argument methods
     *   a2 = size of argument array in bytes
     *   a3 = (managed) thread pointer
     *   [sp + 16] = JValue* result
     *   [sp + 20] = shorty
     */
ENTRY art_quick_invoke_stub
    sw    $a0, 0($sp)           # save out a0
    addiu $sp, $sp, -SPILL_SIZE # spill s0, s1, fp, ra and gp
    .cfi_adjust_cfa_offset SPILL_SIZE
    sw    $gp, 16($sp)
    sw    $ra, 12($sp)
    .cfi_rel_offset 31, 12
    sw    $fp, 8($sp)
    .cfi_rel_offset 30, 8
    sw    $s1, 4($sp)
    .cfi_rel_offset 17, 4
    sw    $s0, 0($sp)
    .cfi_rel_offset 16, 0
    move  $fp, $sp              # save sp in fp
    .cfi_def_cfa_register 30
    move  $s1, $a3              # move managed thread pointer into s1
    addiu $t0, $a2, 4           # create space for ArtMethod* in frame.
    subu  $t0, $sp, $t0         # reserve & align *stack* to 16 bytes:
    srl   $t0, $t0, 4           #   native calling convention only aligns to 8B,
    sll   $sp, $t0, 4           #   so we have to ensure ART 16B alignment ourselves.
    addiu $a0, $sp, 4           # pass stack pointer + ArtMethod* as dest for memcpy
    la    $t9, memcpy
    jalr  $t9                   # (dest, src, bytes)
    addiu $sp, $sp, -16         # make space for argument slots for memcpy
    addiu $sp, $sp, 16          # restore stack after memcpy
    lw    $gp, 16($fp)          # restore $gp
    lw    $a0, SPILL_SIZE($fp)  # restore ArtMethod*
    lw    $a1, 4($sp)           # a1 = this*
    addiu $t8, $sp, 8           # t8 = pointer to the current argument (skip ArtMethod* and this*)
    li    $t6, 0                # t6 = gpr_index = 0 (corresponds to A2; A0 and A1 are skipped)
    li    $t7, 0                # t7 = fp_index = 0
    lw    $t9, 20 + SPILL_SIZE($fp)  # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
                                # as the $fp is SPILL_SIZE bytes below the $sp on entry)
    addiu $t9, 1                # t9 = shorty + 1 (skip 1 for return type)

    // Load the base addresses of tabInt ... tabDouble.
    // We will use the register indices (gpr_index, fp_index) to branch.
    // Note that the indices are scaled by 16, so they can be added to the bases directly.
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
    lapc  $t2, tabInt
    lapc  $t3, tabLong
    lapc  $t4, tabSingle
    lapc  $t5, tabDouble
#else
    bltzal $zero, tabBase       # nal
    addiu $t2, $ra, %lo(tabInt - tabBase)
tabBase:
    addiu $t3, $ra, %lo(tabLong - tabBase)
    addiu $t4, $ra, %lo(tabSingle - tabBase)
    addiu $t5, $ra, %lo(tabDouble - tabBase)
#endif

loop:
    lbu   $ra, 0($t9)           # ra = shorty[i]
    beqz  $ra, loopEnd          # finish getting args when shorty[i] == '\0'
    addiu $t9, 1

    addiu $ra, -'J'
    beqz  $ra, isLong           # branch if result type char == 'J'
    addiu $ra, 'J' - 'D'
    beqz  $ra, isDouble         # branch if result type char == 'D'
    addiu $ra, 'D' - 'F'
    beqz  $ra, isSingle         # branch if result type char == 'F'

    addu  $ra, $t2, $t6
    jalr  $zero, $ra
    addiu $t8, 4                # next_arg = curr_arg + 4

isLong:
    addu  $ra, $t3, $t6
    jalr  $zero, $ra
    addiu $t8, 8                # next_arg = curr_arg + 8

isSingle:
    addu  $ra, $t4, $t7
    jalr  $zero, $ra
    addiu $t8, 4                # next_arg = curr_arg + 4

isDouble:
    addu  $ra, $t5, $t7
#if defined(__mips_isa_rev) && __mips_isa_rev > 2
    addiu $t7, 16               # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
#endif
    jalr  $zero, $ra
    addiu $t8, 8                # next_arg = curr_arg + 8

loopEnd:
    lw    $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)  # get pointer to the code
    jalr  $t9                   # call the method
    sw    $zero, 0($sp)         # store null for ArtMethod* at bottom of frame
    move  $sp, $fp              # restore the stack
    lw    $s0, 0($sp)
    .cfi_restore 16
    lw    $s1, 4($sp)
    .cfi_restore 17
    lw    $fp, 8($sp)
    .cfi_restore 30
    lw    $ra, 12($sp)
    .cfi_restore 31
    addiu $sp, $sp, SPILL_SIZE
    .cfi_adjust_cfa_offset -SPILL_SIZE
    lw    $t0, 16($sp)          # get result pointer
    lw    $t1, 20($sp)          # get shorty
    lb    $t1, 0($t1)           # get result type char
    li    $t2, 'D'              # put char 'D' into t2
    beq   $t1, $t2, 5f          # branch if result type char == 'D'
    li    $t3, 'F'              # put char 'F' into t3
    beq   $t1, $t3, 5f          # branch if result type char == 'F'
    sw    $v0, 0($t0)           # store the result
    jalr  $zero, $ra
    sw    $v1, 4($t0)           # store the other half of the result
5:
    CHECK_ALIGNMENT $t0, $t1, 8
    sdc1  $f0, 0($t0)           # store floating point result
    jalr  $zero, $ra
    nop

    // Note that gpr_index is kept within the range of tabInt and tabLong
    // and fp_index is kept within the range of tabSingle and tabDouble.
    .balign 16
tabInt:
    LOAD_WORD_TO_REG a2, t8, t6, loop             # a2 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG a3, t8, t6, loop             # a3 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG t0, t8, t6, loop             # t0 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG t1, t8, t6, loop             # t1 = current argument, gpr_index += 16
    LOAD_END t6, 4*16, loop                       # no more GPR args, gpr_index = 4*16
tabLong:
    LOAD_LONG_TO_REG a2, a3, t8, t6, 2*16, loop   # a2_a3 = curr_arg, gpr_index = 2*16
    LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop   # t0_t1 = curr_arg, gpr_index = 4*16
    LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop   # t0_t1 = curr_arg, gpr_index = 4*16
    LOAD_END t6, 4*16, loop                       # no more GPR args, gpr_index = 4*16
    LOAD_END t6, 4*16, loop                       # no more GPR args, gpr_index = 4*16
tabSingle:
    LOAD_FLOAT_TO_REG f8, t8, t7, loop            # f8 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f10, t8, t7, loop           # f10 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f12, t8, t7, loop           # f12 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f14, t8, t7, loop           # f14 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f16, t8, t7, loop           # f16 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f18, t8, t7, loop           # f18 = curr_arg, fp_index += 16
    LOAD_END t7, 6*16, loop                       # no more FPR args, fp_index = 6*16
tabDouble:
    LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loop   # f8_f9 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loop # f10_f11 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loop # f12_f13 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loop # f14_f15 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loop # f16_f17 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loop # f18_f19 = curr_arg; if FPU32, fp_index += 16
    LOAD_END t7, 6*16, loop                       # no more FPR args, fp_index = 6*16
END art_quick_invoke_stub

    /*
     * Invocation static stub for quick code.
     * On entry:
     *   a0 = method pointer
     *   a1 = argument array or null for no argument methods
     *   a2 = size of argument array in bytes
     *   a3 = (managed) thread pointer
     *   [sp + 16] = JValue* result
     *   [sp + 20] = shorty
     */
ENTRY art_quick_invoke_static_stub
    sw    $a0, 0($sp)           # save out a0
    addiu $sp, $sp, -SPILL_SIZE # spill s0, s1, fp, ra and gp
    .cfi_adjust_cfa_offset SPILL_SIZE
    sw    $gp, 16($sp)
    sw    $ra, 12($sp)
    .cfi_rel_offset 31, 12
    sw    $fp, 8($sp)
    .cfi_rel_offset 30, 8
    sw    $s1, 4($sp)
    .cfi_rel_offset 17, 4
    sw    $s0, 0($sp)
    .cfi_rel_offset 16, 0
    move  $fp, $sp              # save sp in fp
    .cfi_def_cfa_register 30
    move  $s1, $a3              # move managed thread pointer into s1
    addiu $t0, $a2, 4           # create space for ArtMethod* in frame.
    subu  $t0, $sp, $t0         # reserve & align *stack* to 16 bytes:
    srl   $t0, $t0, 4           #   native calling convention only aligns to 8B,
    sll   $sp, $t0, 4           #   so we have to ensure ART 16B alignment ourselves.
    addiu $a0, $sp, 4           # pass stack pointer + ArtMethod* as dest for memcpy
    la    $t9, memcpy
    jalr  $t9                   # (dest, src, bytes)
    addiu $sp, $sp, -16         # make space for argument slots for memcpy
    addiu $sp, $sp, 16          # restore stack after memcpy
    lw    $gp, 16($fp)          # restore $gp
    lw    $a0, SPILL_SIZE($fp)  # restore ArtMethod*
    addiu $t8, $sp, 4           # t8 = pointer to the current argument (skip ArtMethod*)
    li    $t6, 0                # t6 = gpr_index = 0 (corresponds to A1; A0 is skipped)
    li    $t7, 0                # t7 = fp_index = 0
    lw    $t9, 20 + SPILL_SIZE($fp)  # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
                                # as the $fp is SPILL_SIZE bytes below the $sp on entry)
    addiu $t9, 1                # t9 = shorty + 1 (skip 1 for return type)

    // Load the base addresses of tabIntS ... tabDoubleS.
    // We will use the register indices (gpr_index, fp_index) to branch.
    // Note that the indices are scaled by 16, so they can be added to the bases directly.
#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
    lapc  $t2, tabIntS
    lapc  $t3, tabLongS
    lapc  $t4, tabSingleS
    lapc  $t5, tabDoubleS
#else
    bltzal $zero, tabBaseS      # nal
    addiu $t2, $ra, %lo(tabIntS - tabBaseS)
tabBaseS:
    addiu $t3, $ra, %lo(tabLongS - tabBaseS)
    addiu $t4, $ra, %lo(tabSingleS - tabBaseS)
    addiu $t5, $ra, %lo(tabDoubleS - tabBaseS)
#endif

loopS:
    lbu   $ra, 0($t9)           # ra = shorty[i]
    beqz  $ra, loopEndS         # finish getting args when shorty[i] == '\0'
    addiu $t9, 1

    addiu $ra, -'J'
    beqz  $ra, isLongS          # branch if result type char == 'J'
    addiu $ra, 'J' - 'D'
    beqz  $ra, isDoubleS        # branch if result type char == 'D'
    addiu $ra, 'D' - 'F'
    beqz  $ra, isSingleS        # branch if result type char == 'F'

    addu  $ra, $t2, $t6
    jalr  $zero, $ra
    addiu $t8, 4                # next_arg = curr_arg + 4

isLongS:
    addu  $ra, $t3, $t6
    jalr  $zero, $ra
    addiu $t8, 8                # next_arg = curr_arg + 8

isSingleS:
    addu  $ra, $t4, $t7
    jalr  $zero, $ra
    addiu $t8, 4                # next_arg = curr_arg + 4

isDoubleS:
    addu  $ra, $t5, $t7
#if defined(__mips_isa_rev) && __mips_isa_rev > 2
    addiu $t7, 16               # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
#endif
    jalr  $zero, $ra
    addiu $t8, 8                # next_arg = curr_arg + 8

loopEndS:
    lw    $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)  # get pointer to the code
    jalr  $t9                   # call the method
    sw    $zero, 0($sp)         # store null for ArtMethod* at bottom of frame
    move  $sp, $fp              # restore the stack
    lw    $s0, 0($sp)
    .cfi_restore 16
    lw    $s1, 4($sp)
    .cfi_restore 17
    lw    $fp, 8($sp)
    .cfi_restore 30
    lw    $ra, 12($sp)
    .cfi_restore 31
    addiu $sp, $sp, SPILL_SIZE
    .cfi_adjust_cfa_offset -SPILL_SIZE
    lw    $t0, 16($sp)          # get result pointer
    lw    $t1, 20($sp)          # get shorty
    lb    $t1, 0($t1)           # get result type char
    li    $t2, 'D'              # put char 'D' into t2
    beq   $t1, $t2, 6f          # branch if result type char == 'D'
    li    $t3, 'F'              # put char 'F' into t3
    beq   $t1, $t3, 6f          # branch if result type char == 'F'
    sw    $v0, 0($t0)           # store the result
    jalr  $zero, $ra
    sw    $v1, 4($t0)           # store the other half of the result
6:
    CHECK_ALIGNMENT $t0, $t1, 8
    sdc1  $f0, 0($t0)           # store floating point result
    jalr  $zero, $ra
    nop

    // Note that gpr_index is kept within the range of tabIntS and tabLongS
    // and fp_index is kept within the range of tabSingleS and tabDoubleS.
    .balign 16
tabIntS:
    LOAD_WORD_TO_REG a1, t8, t6, loopS             # a1 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG a2, t8, t6, loopS             # a2 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG a3, t8, t6, loopS             # a3 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG t0, t8, t6, loopS             # t0 = current argument, gpr_index += 16
    LOAD_WORD_TO_REG t1, t8, t6, loopS             # t1 = current argument, gpr_index += 16
    LOAD_END t6, 5*16, loopS                       # no more GPR args, gpr_index = 5*16
tabLongS:
    LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS   # a2_a3 = curr_arg, gpr_index = 3*16
    LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS   # a2_a3 = curr_arg, gpr_index = 3*16
    LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS   # t0_t1 = curr_arg, gpr_index = 5*16
    LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS   # t0_t1 = curr_arg, gpr_index = 5*16
    LOAD_END t6, 5*16, loopS                       # no more GPR args, gpr_index = 5*16
    LOAD_END t6, 5*16, loopS                       # no more GPR args, gpr_index = 5*16
tabSingleS:
    LOAD_FLOAT_TO_REG f8, t8, t7, loopS            # f8 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f10, t8, t7, loopS           # f10 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f12, t8, t7, loopS           # f12 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f14, t8, t7, loopS           # f14 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f16, t8, t7, loopS           # f16 = curr_arg, fp_index += 16
    LOAD_FLOAT_TO_REG f18, t8, t7, loopS           # f18 = curr_arg, fp_index += 16
    LOAD_END t7, 6*16, loopS                       # no more FPR args, fp_index = 6*16
tabDoubleS:
    LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loopS   # f8_f9 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loopS # f10_f11 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loopS # f12_f13 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loopS # f14_f15 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loopS # f16_f17 = curr_arg; if FPU32, fp_index += 16
    LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loopS # f18_f19 = curr_arg; if FPU32, fp_index += 16
    LOAD_END t7, 6*16, loopS                       # no more FPR args, fp_index = 6*16
END art_quick_invoke_static_stub

#undef SPILL_SIZE

    /*
     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
     * failure.
     */
    .extern artHandleFillArrayDataFromCode
ENTRY art_quick_handle_fill_data
    lw     $a2, 0($sp)                # pass referrer's Method*
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
    la     $t9, artHandleFillArrayDataFromCode
    jalr   $t9                        # (payload offset, Array*, method, Thread*)
    move   $a3, rSELF                 # pass Thread::Current
    RETURN_IF_ZERO
END art_quick_handle_fill_data

    /*
     * Entry from managed code that calls artLockObjectFromCode, may block for GC.
     */
    .extern artLockObjectFromCode
ENTRY art_quick_lock_object
    beqz    $a0, art_quick_throw_null_pointer_exception
    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
.Lretry_lock:
    lw      $t0, THREAD_ID_OFFSET(rSELF)  # TODO: Can the thread ID really change during the loop?
    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
    and     $t2, $t1, $t3                 # zero the gc bits
    bnez    $t2, .Lnot_unlocked           # already thin locked
    # Unlocked case - $t1: original lock word that's zero except for the read barrier bits.
    or      $t2, $t1, $t0                 # $t2 holds thread id with count of 0 with preserved read barrier bits
    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
    beqz    $t2, .Lretry_lock             # store failed, retry
    nop
    jalr    $zero, $ra
    sync                                  # full (LoadLoad|LoadStore) memory barrier
.Lnot_unlocked:
    # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits
    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
    bnez    $t2, .Lslow_lock              # if either of the top two bits are set, go slow path
    xor     $t2, $t1, $t0                 # lock_word.ThreadId() ^ self->ThreadId()
    andi    $t2, $t2, 0xFFFF              # zero top 16 bits
    bnez    $t2, .Lslow_lock              # lock word and self thread id's match -> recursive lock
                                          # otherwise contention, go to slow path
    and     $t2, $t1, $t3                 # zero the gc bits
    addu    $t2, $t2, $t8                 # increment count in lock word
    srl     $t2, $t2, LOCK_WORD_STATE_SHIFT  # if the first gc state bit is set, we overflowed.
    bnez    $t2, .Lslow_lock              # if we overflow the count go slow path
    addu    $t2, $t1, $t8                 # increment count for real
    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
    beqz    $t2, .Lretry_lock             # store failed, retry
    nop
    jalr    $zero, $ra
    nop
.Lslow_lock:
    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
    la      $t9, artLockObjectFromCode
    jalr    $t9                           # (Object* obj, Thread*)
    move    $a1, rSELF                    # pass Thread::Current
    RETURN_IF_ZERO
END art_quick_lock_object

ENTRY art_quick_lock_object_no_inline
    beqz    $a0, art_quick_throw_null_pointer_exception
    nop
    SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
    la      $t9, artLockObjectFromCode
    jalr    $t9                           # (Object* obj, Thread*)
    move    $a1, rSELF                    # pass Thread::Current
    RETURN_IF_ZERO
END art_quick_lock_object_no_inline

    /*
     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
     */
    .extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object
    beqz    $a0, art_quick_throw_null_pointer_exception
    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
.Lretry_unlock:
#ifndef USE_READ_BARRIER
    lw      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
#else
    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)  # Need to use atomic read-modify-write for read barrier
#endif
    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
    bnez    $t2, .Lslow_unlock         # if either of the top two bits are set, go slow path
    lw      $t0, THREAD_ID_OFFSET(rSELF)
    and     $t2, $t1, $t3              # zero the gc bits
    xor     $t2, $t2, $t0              # lock_word.ThreadId() ^ self->ThreadId()
    andi    $t2, $t2, 0xFFFF           # zero top 16 bits
    bnez    $t2, .Lslow_unlock         # do lock word and self thread id's match?
    and     $t2, $t1, $t3              # zero the gc bits
    bgeu    $t2, $t8, .Lrecursive_thin_unlock
    # transition to unlocked
    nor     $t2, $zero, $t3            # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED
    and     $t2, $t1, $t2              # $t2: zero except for the preserved gc bits
    sync                               # full (LoadStore|StoreStore) memory barrier
#ifndef USE_READ_BARRIER
    jalr    $zero, $ra
    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
#else
    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
    beqz    $t2, .Lretry_unlock        # store failed, retry
    nop
    jalr    $zero, $ra
    nop
#endif
.Lrecursive_thin_unlock:
    # t1: original lock word
    subu    $t2, $t1, $t8              # decrement count
#ifndef USE_READ_BARRIER
    jalr    $zero, $ra
    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
#else
    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
    beqz    $t2, .Lretry_unlock        # store failed, retry
    nop
    jalr    $zero, $ra
    nop
#endif
.Lslow_unlock:
    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
    la      $t9, artUnlockObjectFromCode
    jalr    $t9                        # (Object* obj, Thread*)
    move    $a1, rSELF                 # pass Thread::Current
    RETURN_IF_ZERO
END art_quick_unlock_object

ENTRY art_quick_unlock_object_no_inline
    beqz    $a0, art_quick_throw_null_pointer_exception
    nop
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
    la      $t9, artUnlockObjectFromCode
    jalr    $t9                       # (Object* obj, Thread*)
    move    $a1, rSELF                # pass Thread::Current
    RETURN_IF_ZERO
END art_quick_unlock_object_no_inline

    /*
     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
     */
    .extern artInstanceOfFromCode
    .extern artThrowClassCastExceptionForObject
ENTRY art_quick_check_instance_of
    // Type check using the bit string passes null as the target class. In that case just throw.
    beqz   $a1, .Lthrow_class_cast_exception_for_bitstring_check
    nop

    addiu  $sp, $sp, -32
    .cfi_adjust_cfa_offset 32
    sw     $gp, 16($sp)
    sw     $ra, 12($sp)
    .cfi_rel_offset 31, 12
    sw     $t9, 8($sp)
    sw     $a1, 4($sp)
    sw     $a0, 0($sp)
    la     $t9, artInstanceOfFromCode
    jalr   $t9
    addiu  $sp, $sp, -16             # reserve argument slots on the stack
    addiu  $sp, $sp, 16
    lw     $gp, 16($sp)
    beqz   $v0, .Lthrow_class_cast_exception
    lw     $ra, 12($sp)
    jalr   $zero, $ra
    addiu  $sp, $sp, 32
    .cfi_adjust_cfa_offset -32

.Lthrow_class_cast_exception:
    lw     $t9, 8($sp)
    lw     $a1, 4($sp)
    lw     $a0, 0($sp)
    addiu  $sp, $sp, 32
    .cfi_adjust_cfa_offset -32

.Lthrow_class_cast_exception_for_bitstring_check:
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    la   $t9, artThrowClassCastExceptionForObject
    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
    move $a2, rSELF                 # pass Thread::Current
END art_quick_check_instance_of

    /*
     * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
     * nReg is the register number for rReg.
     */
.macro POP_REG_NE rReg, nReg, offset, rExclude
    .ifnc \rReg, \rExclude
        lw \rReg, \offset($sp)      # restore rReg
        .cfi_restore \nReg
    .endif
.endm

    /*
     * Macro to insert read barrier, only used in art_quick_aput_obj.
     * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
     */
.macro READ_BARRIER rDest, rObj, offset
#ifdef USE_READ_BARRIER
    # saved registers used in art_quick_aput_obj: a0-a2, t0-t1, t9, ra. 8 words for 16B alignment.
    addiu  $sp, $sp, -32
    .cfi_adjust_cfa_offset 32
    sw     $ra, 28($sp)
    .cfi_rel_offset 31, 28
    sw     $t9, 24($sp)
    .cfi_rel_offset 25, 24
    sw     $t1, 20($sp)
    .cfi_rel_offset 9, 20
    sw     $t0, 16($sp)
    .cfi_rel_offset 8, 16
    sw     $a2, 8($sp)              # padding slot at offset 12 (padding can be any slot in the 32B)
    .cfi_rel_offset 6, 8
    sw     $a1, 4($sp)
    .cfi_rel_offset 5, 4
    sw     $a0, 0($sp)
    .cfi_rel_offset 4, 0

    # move $a0, \rRef               # pass ref in a0 (no-op for now since parameter ref is unused)
    .ifnc \rObj, $a1
        move $a1, \rObj             # pass rObj
    .endif
    addiu  $a2, $zero, \offset      # pass offset
    la     $t9, artReadBarrierSlow
    jalr   $t9                      # artReadBarrierSlow(ref, rObj, offset)
    addiu  $sp, $sp, -16            # Use branch delay slot to reserve argument slots on the stack
                                    # before the call to artReadBarrierSlow.
    addiu  $sp, $sp, 16             # restore stack after call to artReadBarrierSlow
    # No need to unpoison return value in v0, artReadBarrierSlow() would do the unpoisoning.
    move \rDest, $v0                # save return value in rDest
                                    # (rDest cannot be v0 in art_quick_aput_obj)

    lw     $a0, 0($sp)              # restore registers except rDest
                                    # (rDest can only be t0 or t1 in art_quick_aput_obj)
    .cfi_restore 4
    lw     $a1, 4($sp)
    .cfi_restore 5
    lw     $a2, 8($sp)
    .cfi_restore 6
    POP_REG_NE $t0, 8, 16, \rDest
    POP_REG_NE $t1, 9, 20, \rDest
    lw     $t9, 24($sp)
    .cfi_restore 25
    lw     $ra, 28($sp)             # restore $ra
    .cfi_restore 31
    addiu  $sp, $sp, 32
    .cfi_adjust_cfa_offset -32
#else
    lw     \rDest, \offset(\rObj)
    UNPOISON_HEAP_REF \rDest
#endif  // USE_READ_BARRIER
.endm

#ifdef USE_READ_BARRIER
    .extern artReadBarrierSlow
#endif
ENTRY art_quick_aput_obj
    beqz $a2, .Ldo_aput_null
    nop
    READ_BARRIER $t0, $a0, MIRROR_OBJECT_CLASS_OFFSET
    READ_BARRIER $t1, $a2, MIRROR_OBJECT_CLASS_OFFSET
    READ_BARRIER $t0, $t0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
    bne $t1, $t0, .Lcheck_assignability  # value's type == array's component type - trivial assignability
    nop
.Ldo_aput:
    sll $a1, $a1, 2
    add $t0, $a0, $a1
    POISON_HEAP_REF $a2
    sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
    lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
    srl $t1, $a0, CARD_TABLE_CARD_SHIFT
    add $t1, $t1, $t0
    sb  $t0, ($t1)
    jalr $zero, $ra
    nop
.Ldo_aput_null:
    sll $a1, $a1, 2
    add $t0, $a0, $a1
    sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
    jalr $zero, $ra
    nop
.Lcheck_assignability:
    addiu  $sp, $sp, -32
    .cfi_adjust_cfa_offset 32
    sw     $ra, 28($sp)
    .cfi_rel_offset 31, 28
    sw     $gp, 16($sp)
    sw     $t9, 12($sp)
    sw     $a2, 8($sp)
    sw     $a1, 4($sp)
    sw     $a0, 0($sp)
    move   $a1, $t1
    move   $a0, $t0
    la     $t9, artIsAssignableFromCode
    jalr   $t9               # (Class*, Class*)
    addiu  $sp, $sp, -16     # reserve argument slots on the stack
    addiu  $sp, $sp, 16
    lw     $ra, 28($sp)
    lw     $gp, 16($sp)
    lw     $t9, 12($sp)
    lw     $a2, 8($sp)
    lw     $a1, 4($sp)
    lw     $a0, 0($sp)
    addiu  $sp, 32
    .cfi_adjust_cfa_offset -32
    bnez   $v0, .Ldo_aput
    nop
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    move $a1, $a2
    la   $t9, artThrowArrayStoreException
    jalr $zero, $t9                 # artThrowArrayStoreException(Class*, Class*, Thread*)
    move $a2, rSELF                 # pass Thread::Current
END art_quick_aput_obj

// Macros taking opportunity of code similarities for downcalls.
.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9                       # (field_idx, Thread*)
    move    $a1, rSELF                # pass Thread::Current
    \return                           # RETURN_IF_NO_EXCEPTION or RETURN_IF_ZERO
END \name
.endm

.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9                       # (field_idx, Object*, Thread*) or
                                      # (field_idx, new_val, Thread*)
    move    $a2, rSELF                # pass Thread::Current
    \return                           # RETURN_IF_NO_EXCEPTION or RETURN_IF_ZERO
END \name
.endm

.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9                       # (field_idx, Object*, new_val, Thread*)
    move    $a3, rSELF                # pass Thread::Current
    \return                           # RETURN_IF_NO_EXCEPTION or RETURN_IF_ZERO
END \name
.endm

.macro FOUR_ARG_REF_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9                       # (field_idx, Object*, 64-bit new_val, Thread*) or
                                      # (field_idx, 64-bit new_val, Thread*)
                                      # Note that a 64-bit new_val needs to be aligned with
                                      # an even-numbered register, hence A1 may be skipped
                                      # for new_val to reside in A2-A3.
    sw      rSELF, 16($sp)            # pass Thread::Current
    \return                           # RETURN_IF_NO_EXCEPTION or RETURN_IF_ZERO
END \name
.endm

    /*
     * Called by managed code to resolve a static/instance field and load/store a value.
     *
     * Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
     * defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
     */
ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_IF_NO_EXCEPTION
TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_ZERO
TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_ZERO
TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_ZERO
TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_ZERO
FOUR_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_ZERO
THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_ZERO
THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_ZERO
THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_ZERO
THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_ZERO
FOUR_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_ZERO

// Macro to facilitate adding new allocation entrypoints.
.macro ONE_ARG_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9
    move    $a1, rSELF                # pass Thread::Current
    \return
END \name
.endm

.macro TWO_ARG_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9
    move    $a2, rSELF                # pass Thread::Current
    \return
END \name
.endm

.macro THREE_ARG_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9
    move    $a3, rSELF                # pass Thread::Current
    \return
END \name
.endm

.macro FOUR_ARG_DOWNCALL name, entrypoint, return
    .extern \entrypoint
ENTRY \name
    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case of GC
    la      $t9, \entrypoint
    jalr    $t9
    sw      rSELF, 16($sp)            # pass Thread::Current
    \return
END \name
.endm

// Generate the allocation entrypoints for each allocator.
GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
// Comment out allocators that have mips specific asm.
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)

// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)

// A hand-written override for:
//   GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
//   GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
ENTRY_NO_GP \c_name
    # Fast path rosalloc allocation
    # a0: type
    # s1: Thread::Current
    # -----------------------------
    # t1: object size
    # t2: rosalloc run
    # t3: thread stack top offset
    # t4: thread stack bottom offset
    # v0: free list head
    #
    # t5, t6 : temps
    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
    bgeu  $t3, $t4, .Lslow_path_\c_name

    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET($a0)  # Load object size (t1).
    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
                                                               # allocation. Also does the
                                                               # initialized and finalizable checks.
    # When isInitialized == 0, then the class is potentially not yet initialized.
    # If the class is not yet initialized, the object size will be very large to force the branch
    # below to be taken.
    #
    # See InitializeClassVisitors in class-inl.h for more details.
    bgtu  $t1, $t5, .Lslow_path_\c_name

    # Compute the rosalloc bracket index from the size. Since the size is already aligned we can
    # combine the two shifts together.
    srl   $t1, $t1, (ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)

    addu  $t2, $t1, $s1
    lw    $t2, (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)($t2)  # Load rosalloc run (t2).

    # Load the free list head (v0).
    # NOTE: this will be the return val.
    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
    beqz  $v0, .Lslow_path_\c_name
    nop

    # Load the next pointer of the head and update the list head with the next pointer.
    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)

    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
    # asserted to match.

#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
#error "Class pointer needs to overwrite next pointer."
#endif

    POISON_HEAP_REF $a0
    sw    $a0, MIRROR_OBJECT_CLASS_OFFSET($v0)

    # Push the new object onto the thread local allocation stack and increment the thread local
    # allocation stack top.
    sw    $v0, 0($t3)
    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)

    # Decrement the size of the free list.
    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
    addiu $t5, $t5, -1
    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)

.if \isInitialized == 0
    # This barrier is only necessary when the allocation also requires a class initialization check.
    #
    # If the class is already observably initialized, then new-instance allocations are protected
    # from publishing by the compiler which inserts its own StoreStore barrier.
    sync                                                          # Fence.
.endif
    jalr  $zero, $ra
    nop

  .Lslow_path_\c_name:
    addiu $t9, $t9, (.Lslow_path_\c_name - \c_name) + 4
    .cpload $t9
    SETUP_SAVE_REFS_ONLY_FRAME
    la    $t9, \cxx_name
    jalr  $t9
    move  $a1, $s1                                                # Pass self as argument.
    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
END \c_name
.endm

ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1

// The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
// and art_quick_alloc_object_resolved/initialized_region_tlab.
//
// a0: type, s1(rSELF): Thread::Current.
// Need to preserve a0 to the slow path.
//
// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
// If isInitialized=0 the compiler can only assume it's been at least resolved.
.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
    lw    $v0, THREAD_LOCAL_POS_OFFSET(rSELF)          # Load thread_local_pos.
    lw    $a2, THREAD_LOCAL_END_OFFSET(rSELF)          # Load thread_local_end.
    subu  $a3, $a2, $v0                                # Compute the remaining buffer size.
    lw    $t0, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET($a0)  # Load the object size.

    # When isInitialized == 0, then the class is potentially not yet initialized.
    # If the class is not yet initialized, the object size will be very large to force the branch
    # below to be taken.
    #
    # See InitializeClassVisitors in class-inl.h for more details.
    bgtu  $t0, $a3, \slowPathLabel                     # Check if it fits.
    addu  $t1, $v0, $t0                                # Add object size to tlab pos (in branch
                                                       # delay slot).
    # "Point of no slow path". Won't go to the slow path from here on.
    sw    $t1, THREAD_LOCAL_POS_OFFSET(rSELF)          # Store new thread_local_pos.
    lw    $a2, THREAD_LOCAL_OBJECTS_OFFSET(rSELF)      # Increment thread_local_objects.
    addiu $a2, $a2, 1
    sw    $a2, THREAD_LOCAL_OBJECTS_OFFSET(rSELF)
    POISON_HEAP_REF $a0
    sw    $a0, MIRROR_OBJECT_CLASS_OFFSET($v0)         # Store the class pointer.

.if \isInitialized == 0
    # This barrier is only necessary when the allocation also requires a class initialization check.
    #
    # If the class is already observably initialized, then new-instance allocations are protected
    # from publishing by the compiler which inserts its own StoreStore barrier.
    sync                                               # Fence.
.endif
    jalr  $zero, $ra
    nop
.endm

// The common code for art_quick_alloc_object_resolved/initialized_tlab
// and art_quick_alloc_object_resolved/initialized_region_tlab.
.macro GENERATE_ALLOC_OBJECT_TLAB name, entrypoint, isInitialized
ENTRY_NO_GP \name
    # Fast path tlab allocation.
    # a0: type, s1(rSELF): Thread::Current.
    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path_\name, \isInitialized
.Lslow_path_\name:
    addiu $t9, $t9, (.Lslow_path_\name - \name) + 4
    .cpload $t9
    SETUP_SAVE_REFS_ONLY_FRAME                         # Save callee saves in case of GC.
    la    $t9, \entrypoint
    jalr  $t9                                          # (mirror::Class*, Thread*)
    move  $a1, rSELF                                   # Pass Thread::Current.
    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
END \name
.endm

GENERATE_ALLOC_OBJECT_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
GENERATE_ALLOC_OBJECT_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
GENERATE_ALLOC_OBJECT_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
GENERATE_ALLOC_OBJECT_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1

// The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
// and art_quick_alloc_array_resolved/initialized_region_tlab.
//
// a0: type, a1: component_count, a2: total_size, s1(rSELF): Thread::Current.
// Need to preserve a0 and a1 to the slow path.
.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
    li    $a3, OBJECT_ALIGNMENT_MASK_TOGGLED           # Apply alignemnt mask
    and   $a2, $a2, $a3                                # (addr + 7) & ~7.

    lw    $v0, THREAD_LOCAL_POS_OFFSET(rSELF)          # Load thread_local_pos.
    lw    $t1, THREAD_LOCAL_END_OFFSET(rSELF)          # Load thread_local_end.
    subu  $t2, $t1, $v0                                # Compute the remaining buffer size.
    bgtu  $a2, $t2, \slowPathLabel                     # Check if it fits.
    addu  $a2, $v0, $a2                                # Add object size to tlab pos (in branch
                                                       # delay slot).

    # "Point of no slow path". Won't go to the slow path from here on.
    sw    $a2, THREAD_LOCAL_POS_OFFSET(rSELF)          # Store new thread_local_pos.
    lw    $a2, THREAD_LOCAL_OBJECTS_OFFSET(rSELF)      # Increment thread_local_objects.
    addiu $a2, $a2, 1
    sw    $a2, THREAD_LOCAL_OBJECTS_OFFSET(rSELF)
    POISON_HEAP_REF $a0
    sw    $a0, MIRROR_OBJECT_CLASS_OFFSET($v0)         # Store the class pointer.
    jalr  $zero, $ra
    sw    $a1, MIRROR_ARRAY_LENGTH_OFFSET($v0)         # Store the array length.
.endm

.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
ENTRY_NO_GP \name
    # Fast path array allocation for region tlab allocation.
    # a0: mirror::Class* type
    # a1: int32_t component_count
    # s1(rSELF): Thread::Current
    \size_setup .Lslow_path_\name
    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path_\name
.Lslow_path_\name:
    # a0: mirror::Class* type
    # a1: int32_t component_count
    # a2: Thread* self
    addiu $t9, $t9, (.Lslow_path_\name - \name) + 4
    .cpload $t9
    SETUP_SAVE_REFS_ONLY_FRAME                         # Save callee saves in case of GC.
    la    $t9, \entrypoint
    jalr  $t9
    move  $a2, rSELF                                   # Pass Thread::Current.
    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
END \name
.endm

.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
    break                                              # We should never enter here.
                                                       # Code below is for reference.
                                                       # Possibly a large object, go slow.
                                                       # Also does negative array size check.
    li    $a2, ((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
    bgtu  $a1, $a2, \slow_path
                                                       # Array classes are never finalizable
                                                       # or uninitialized, no need to check.
    lw    $a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET($a0) # Load component type.
    UNPOISON_HEAP_REF $a3
    lw    $a3, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET($a3)
    srl   $a3, $a3, PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT    # Component size shift is in high 16 bits.
    sllv  $a2, $a1, $a3                                # Calculate data size.
                                                       # Add array data offset and alignment.
    addiu $a2, $a2, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
#if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
#error Long array data offset must be 4 greater than int array data offset.
#endif

    addiu $a3, $a3, 1                                  # Add 4 to the length only if the component
    andi  $a3, $a3, 4                                  # size shift is 3 (for 64 bit alignment).
    addu  $a2, $a2, $a3
.endm

.macro COMPUTE_ARRAY_SIZE_8 slow_path
    # Possibly a large object, go slow.
    # Also does negative array size check.
    li    $a2, (MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
    bgtu  $a1, $a2, \slow_path
    # Add array data offset and alignment (in branch delay slot).
    addiu $a2, $a1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm

.macro COMPUTE_ARRAY_SIZE_16 slow_path
    # Possibly a large object, go slow.
    # Also does negative array size check.
    li    $a2, ((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
    bgtu  $a1, $a2, \slow_path
    sll   $a2, $a1, 1
    # Add array data offset and alignment.
    addiu $a2, $a2, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm

.macro COMPUTE_ARRAY_SIZE_32 slow_path
    # Possibly a large object, go slow.
    # Also does negative array size check.
    li    $a2, ((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
    bgtu  $a1, $a2, \slow_path
    sll   $a2, $a1, 2
    # Add array data offset and alignment.
    addiu $a2, $a2, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm

.macro COMPUTE_ARRAY_SIZE_64 slow_path
    # Possibly a large object, go slow.
    # Also does negative array size check.
    li    $a2, ((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
    bgtu  $a1, $a2, \slow_path
    sll   $a2, $a1, 3
    # Add array data offset and alignment.
    addiu $a2, $a2, (MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm

GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64

GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64

    /*
     * Macro for resolution and initialization of indexed DEX file
     * constants such as classes and strings. $a0 is both input and
     * output.
     */
.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
    .extern \entrypoint
ENTRY_NO_GP \name
    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  # Save everything in case of GC.
    move    $s2, $gp                  # Preserve $gp across the call for exception delivery.
    la      $t9, \entrypoint
    jalr    $t9                       # (uint32_t index, Thread*)
    move    $a1, rSELF                # Pass Thread::Current (in delay slot).
    beqz    $v0, 1f                   # Success?
    move    $a0, $v0                  # Move result to $a0 (in delay slot).
    RESTORE_SAVE_EVERYTHING_FRAME 0   # Restore everything except $a0.
    jalr    $zero, $ra                # Return on success.
    nop
1:
    move    $gp, $s2
    DELIVER_PENDING_EXCEPTION_FRAME_READY
END \name
.endm

.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
.endm

    /*
     * Entry from managed code to resolve a method handle. On entry, A0 holds the method handle
     * index. On success the MethodHandle is returned, otherwise an exception is raised.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode

    /*
     * Entry from managed code to resolve a method type. On entry, A0 holds the method type index.
     * On success the MethodType is returned, otherwise an exception is raised.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode

    /*
     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
     * exception on error. On success the String is returned. A0 holds the string index. The fast
     * path check for hit in strings cache has already been performed.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode

    /*
     * Entry from managed code when uninitialized static storage, this stub will run the class
     * initializer and deliver the exception on error. On success the static storage base is
     * returned.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode

    /*
     * Entry from managed code when dex cache misses for a type_idx.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode

    /*
     * Entry from managed code when type_idx needs to be checked for access and dex cache may also
     * miss.
     */
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode

    /*
     * Called by managed code when the value in rSUSPEND has been decremented to 0.
     */
    .extern artTestSuspendFromCode
ENTRY_NO_GP art_quick_test_suspend
    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
                                                     # save everything for stack crawl
    la     $t9, artTestSuspendFromCode
    jalr   $t9                                       # (Thread*)
    move   $a0, rSELF
    RESTORE_SAVE_EVERYTHING_FRAME
    jalr   $zero, $ra
    nop
END art_quick_test_suspend

    /*
     * Called by managed code that is attempting to call a method on a proxy class. On entry
     * a0 holds the proxy method; a1, a2 and a3 may contain arguments.
     */
    .extern artQuickProxyInvokeHandler
ENTRY art_quick_proxy_invoke_handler
    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
    move    $a2, rSELF                  # pass Thread::Current
    la      $t9, artQuickProxyInvokeHandler
    jalr    $t9                         # (Method* proxy method, receiver, Thread*, SP)
    addiu   $a3, $sp, ARG_SLOT_SIZE     # pass $sp (remove arg slots)
    lw      $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    bnez    $t7, 1f
    # don't care if $v0 and/or $v1 are modified, when exception branch taken
    MTD     $v0, $v1, $f0, $f1          # move float value to return value
    jalr    $zero, $ra
    nop
1:
    DELIVER_PENDING_EXCEPTION
END art_quick_proxy_invoke_handler

    /*
     * Called to resolve an imt conflict.
     * a0 is the conflict ArtMethod.
     * t7 is a hidden argument that holds the target interface method's dex method index.
     *
     * Note that this stub writes to v0-v1, a0, t2-t9, f0-f7.
     */
    .extern artLookupResolvedMethod
    .extern __atomic_load_8         # For int64_t std::atomic::load(std::memory_order).
ENTRY art_quick_imt_conflict_trampoline
    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY /* save_s4_thru_s8 */ 0

    lw      $t8, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $t8 = referrer.
    // If the method is obsolete, just go through the dex cache miss slow path.
    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
    lw      $t9, ART_METHOD_ACCESS_FLAGS_OFFSET($t8)  # $t9 = access flags.
    sll     $t9, $t9, 31 - ACC_OBSOLETE_METHOD_SHIFT  # Move obsolete method bit to sign bit.
    bltz    $t9, .Limt_conflict_trampoline_dex_cache_miss
    lw      $t8, ART_METHOD_DECLARING_CLASS_OFFSET($t8)  # $t8 = declaring class (no read barrier).
    lw      $t8, MIRROR_CLASS_DEX_CACHE_OFFSET($t8)  # $t8 = dex cache (without read barrier).
    UNPOISON_HEAP_REF $t8
    la      $t9, __atomic_load_8
    addiu   $sp, $sp, -ARG_SLOT_SIZE                # Reserve argument slots on the stack.
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
    lw      $t8, MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET($t8)  # $t8 = dex cache methods array.

    move    $s2, $t7                                # $s2 = method index (callee-saved).
    lw      $s3, ART_METHOD_JNI_OFFSET_32($a0)      # $s3 = ImtConflictTable (callee-saved).

    sll     $t7, $t7, 32 - METHOD_DEX_CACHE_HASH_BITS  # $t7 = slot index in top bits, zeroes below.
    srl     $t7, $t7, 32 - METHOD_DEX_CACHE_HASH_BITS - (POINTER_SIZE_SHIFT + 1)
                                                    # $t7 = slot offset.

    li      $a1, STD_MEMORY_ORDER_RELAXED           # $a1 = std::memory_order_relaxed.
    jalr    $t9                                     # [$v0, $v1] = __atomic_load_8($a0, $a1).
    addu    $a0, $t8, $t7                           # $a0 = DexCache method slot address.

    bne     $v1, $s2, .Limt_conflict_trampoline_dex_cache_miss  # Branch if method index miss.
    addiu   $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE

.Limt_table_iterate:
    lw      $t8, 0($s3)                             # Load next entry in ImtConflictTable.
    # Branch if found.
    beq     $t8, $v0, .Limt_table_found
    nop
    # If the entry is null, the interface method is not in the ImtConflictTable.
    beqz    $t8, .Lconflict_trampoline
    nop
    # Iterate over the entries of the ImtConflictTable.
    b       .Limt_table_iterate
    addiu   $s3, $s3, 2 * __SIZEOF_POINTER__        # Iterate to the next entry.

.Limt_table_found:
    # We successfully hit an entry in the table. Load the target method and jump to it.
    .cfi_remember_state
    lw      $a0, __SIZEOF_POINTER__($s3)
    lw      $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
    RESTORE_SAVE_REFS_AND_ARGS_FRAME /* restore_s4_thru_s8 */ 0, /* remove_arg_slots */ 0
    jalr    $zero, $t9
    nop
    .cfi_restore_state

.Lconflict_trampoline:
    # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
    .cfi_remember_state
    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP             # Restore clobbered $gp.
    RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1             # Restore this.
    move    $a0, $v0                                # Load interface method.
    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline, /* save_s4_thru_s8_only */ 1
    .cfi_restore_state

.Limt_conflict_trampoline_dex_cache_miss:
    # We're not creating a proper runtime method frame here,
    # artLookupResolvedMethod() is not allowed to walk the stack.
    RESTORE_SAVE_REFS_AND_ARGS_FRAME_GP             # Restore clobbered $gp.
    lw      $a1, FRAME_SIZE_SAVE_REFS_AND_ARGS($sp)  # $a1 = referrer.
    la      $t9, artLookupResolvedMethod
    addiu   $sp, $sp, -ARG_SLOT_SIZE                # Reserve argument slots on the stack.
    .cfi_adjust_cfa_offset ARG_SLOT_SIZE
    jalr    $t9                                     # (uint32_t method_index, ArtMethod* referrer).
    move    $a0, $s2                                # $a0 = method index.

    # If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
    beqz    $v0, .Lconflict_trampoline
    addiu   $sp, $sp, ARG_SLOT_SIZE                 # Remove argument slots from the stack.
    .cfi_adjust_cfa_offset -ARG_SLOT_SIZE

    b       .Limt_table_iterate
    nop
END art_quick_imt_conflict_trampoline

    .extern artQuickResolutionTrampoline
ENTRY art_quick_resolution_trampoline
    SETUP_SAVE_REFS_AND_ARGS_FRAME
    move    $a2, rSELF                    # pass Thread::Current
    la      $t9, artQuickResolutionTrampoline
    jalr    $t9                           # (Method* called, receiver, Thread*, SP)
    addiu   $a3, $sp, ARG_SLOT_SIZE       # pass $sp (remove arg slots)
    beqz    $v0, 1f
    lw      $a0, ARG_SLOT_SIZE($sp)       # load resolved method to $a0
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    move    $t9, $v0               # code pointer must be in $t9 to generate the global pointer
    jalr    $zero, $t9             # tail call to method
    nop
1:
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    DELIVER_PENDING_EXCEPTION
END art_quick_resolution_trampoline

    .extern artQuickGenericJniTrampoline
    .extern artQuickGenericJniEndTrampoline
ENTRY art_quick_generic_jni_trampoline
    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0
    move    $s8, $sp               # save $sp to $s8
    move    $s3, $gp               # save $gp to $s3

    # prepare for call to artQuickGenericJniTrampoline(Thread*, SP)
    move    $a0, rSELF                     # pass Thread::Current
    addiu   $a1, $sp, ARG_SLOT_SIZE        # save $sp (remove arg slots)
    la      $t9, artQuickGenericJniTrampoline
    jalr    $t9                            # (Thread*, SP)
    addiu   $sp, $sp, -5120                # reserve space on the stack

    # The C call will have registered the complete save-frame on success.
    # The result of the call is:
    # v0: ptr to native code, 0 on error.
    # v1: ptr to the bottom of the used area of the alloca, can restore stack till here.
    beq     $v0, $zero, 2f         # check entry error
    move    $t9, $v0               # save the code ptr
    move    $sp, $v1               # release part of the alloca

    # Load parameters from stack into registers
    lw      $a0,   0($sp)
    lw      $a1,   4($sp)
    lw      $a2,   8($sp)
    lw      $a3,  12($sp)

    # artQuickGenericJniTrampoline sets bit 0 of the native code address to 1
    # when the first two arguments are both single precision floats. This lets
    # us extract them properly from the stack and load into floating point
    # registers.
    MTD     $a0, $a1, $f12, $f13
    andi    $t0, $t9, 1
    xor     $t9, $t9, $t0
    bnez    $t0, 1f
    mtc1    $a1, $f14
    MTD     $a2, $a3, $f14, $f15

1:
    jalr    $t9                    # native call
    nop
    addiu   $sp, $sp, 16           # remove arg slots

    move    $gp, $s3               # restore $gp from $s3

    # result sign extension is handled in C code
    # prepare for call to artQuickGenericJniEndTrampoline(Thread*, result, result_f)
    move    $a0, rSELF             # pass Thread::Current
    move    $a2, $v0               # pass result
    move    $a3, $v1
    addiu   $sp, $sp, -32          # reserve arg slots
    la      $t9, artQuickGenericJniEndTrampoline
    jalr    $t9
    s.d     $f0, 16($sp)           # pass result_f

    lw      $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    bne     $t0, $zero, 2f         # check for pending exceptions

    move    $sp, $s8               # tear down the alloca

    # tear down the callee-save frame
    RESTORE_SAVE_REFS_AND_ARGS_FRAME

    MTD     $v0, $v1, $f0, $f1     # move float value to return value
    jalr    $zero, $ra
    nop

2:
    lw      $t0, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
    addiu   $sp, $t0, -1  // Remove the GenericJNI tag.
    move    $gp, $s3               # restore $gp from $s3
    # This will create a new save-all frame, required by the runtime.
    DELIVER_PENDING_EXCEPTION
END art_quick_generic_jni_trampoline

    .extern artQuickToInterpreterBridge
ENTRY art_quick_to_interpreter_bridge
    SETUP_SAVE_REFS_AND_ARGS_FRAME
    move    $a1, rSELF                          # pass Thread::Current
    la      $t9, artQuickToInterpreterBridge
    jalr    $t9                                 # (Method* method, Thread*, SP)
    addiu   $a2, $sp, ARG_SLOT_SIZE             # pass $sp (remove arg slots)
    lw      $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    bnez    $t7, 1f
    # don't care if $v0 and/or $v1 are modified, when exception branch taken
    MTD     $v0, $v1, $f0, $f1                  # move float value to return value
    jalr    $zero, $ra
    nop
1:
    DELIVER_PENDING_EXCEPTION
END art_quick_to_interpreter_bridge

    .extern artInvokeObsoleteMethod
ENTRY art_invoke_obsolete_method_stub
    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    la      $t9, artInvokeObsoleteMethod
    jalr    $t9                                 # (Method* method, Thread* self)
    move    $a1, rSELF                          # pass Thread::Current
END art_invoke_obsolete_method_stub

    /*
     * Routine that intercepts method calls and returns.
     */
    .extern artInstrumentationMethodEntryFromCode
    .extern artInstrumentationMethodExitFromCode
ENTRY art_quick_instrumentation_entry
    SETUP_SAVE_REFS_AND_ARGS_FRAME
    sw      $a0, 28($sp)    # save arg0 in free arg slot
    addiu   $a3, $sp, ARG_SLOT_SIZE     # Pass $sp.
    la      $t9, artInstrumentationMethodEntryFromCode
    jalr    $t9             # (Method*, Object*, Thread*, SP)
    move    $a2, rSELF      # pass Thread::Current
    beqz    $v0, .Ldeliver_instrumentation_entry_exception
    move    $t9, $v0        # $t9 holds reference to code
    lw      $a0, 28($sp)    # restore arg0 from free arg slot
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    la      $ra, art_quick_instrumentation_exit
    jalr    $zero, $t9      # call method, returning to art_quick_instrumentation_exit
    nop
.Ldeliver_instrumentation_entry_exception:
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    DELIVER_PENDING_EXCEPTION
END art_quick_instrumentation_entry

ENTRY_NO_GP art_quick_instrumentation_exit
    move    $ra, $zero      # RA points here, so clobber with 0 for later checks.
    SETUP_SAVE_EVERYTHING_FRAME  # Allocates ARG_SLOT_SIZE bytes at the bottom of the stack.
    move    $s2, $gp             # Preserve $gp across the call for exception delivery.

    addiu   $a3, $sp, ARG_SLOT_SIZE+16  # Pass fpr_res pointer ($f0 in SAVE_EVERYTHING_FRAME).
    addiu   $a2, $sp, ARG_SLOT_SIZE+148 # Pass gpr_res pointer ($v0 in SAVE_EVERYTHING_FRAME).
    addiu   $a1, $sp, ARG_SLOT_SIZE     # Pass $sp.
    la      $t9, artInstrumentationMethodExitFromCode
    jalr    $t9                         # (Thread*, SP, gpr_res*, fpr_res*)
    move    $a0, rSELF                  # Pass Thread::Current.

    beqz    $v0, .Ldo_deliver_instrumentation_exception
    move    $gp, $s2        # Deliver exception if we got nullptr as function.
    bnez    $v1, .Ldeoptimize

    # Normal return.
    sw      $v0, (ARG_SLOT_SIZE+FRAME_SIZE_SAVE_EVERYTHING-4)($sp)  # Set return pc.
    RESTORE_SAVE_EVERYTHING_FRAME
    jalr    $zero, $ra
    nop
.Ldo_deliver_instrumentation_exception:
    DELIVER_PENDING_EXCEPTION_FRAME_READY
.Ldeoptimize:
    b       art_quick_deoptimize
    sw      $v1, (ARG_SLOT_SIZE+FRAME_SIZE_SAVE_EVERYTHING-4)($sp)
                            # Fake a call from instrumentation return pc.
END art_quick_instrumentation_exit

    /*
     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
     * will long jump to the upcall with a special exception of -1.
     */
    .extern artDeoptimize
ENTRY_NO_GP_CUSTOM_CFA art_quick_deoptimize, ARG_SLOT_SIZE+FRAME_SIZE_SAVE_EVERYTHING
    # SETUP_SAVE_EVERYTHING_FRAME has been done by art_quick_instrumentation_exit.
    .cfi_rel_offset 31, ARG_SLOT_SIZE+252
    .cfi_rel_offset 30, ARG_SLOT_SIZE+248
    .cfi_rel_offset 28, ARG_SLOT_SIZE+244
    .cfi_rel_offset 25, ARG_SLOT_SIZE+240
    .cfi_rel_offset 24, ARG_SLOT_SIZE+236
    .cfi_rel_offset 23, ARG_SLOT_SIZE+232
    .cfi_rel_offset 22, ARG_SLOT_SIZE+228
    .cfi_rel_offset 21, ARG_SLOT_SIZE+224
    .cfi_rel_offset 20, ARG_SLOT_SIZE+220
    .cfi_rel_offset 19, ARG_SLOT_SIZE+216
    .cfi_rel_offset 18, ARG_SLOT_SIZE+212
    .cfi_rel_offset 17, ARG_SLOT_SIZE+208
    .cfi_rel_offset 16, ARG_SLOT_SIZE+204
    .cfi_rel_offset 15, ARG_SLOT_SIZE+200
    .cfi_rel_offset 14, ARG_SLOT_SIZE+196
    .cfi_rel_offset 13, ARG_SLOT_SIZE+192
    .cfi_rel_offset 12, ARG_SLOT_SIZE+188
    .cfi_rel_offset 11, ARG_SLOT_SIZE+184
    .cfi_rel_offset 10, ARG_SLOT_SIZE+180
    .cfi_rel_offset 9, ARG_SLOT_SIZE+176
    .cfi_rel_offset 8, ARG_SLOT_SIZE+172
    .cfi_rel_offset 7, ARG_SLOT_SIZE+168
    .cfi_rel_offset 6, ARG_SLOT_SIZE+164
    .cfi_rel_offset 5, ARG_SLOT_SIZE+160
    .cfi_rel_offset 4, ARG_SLOT_SIZE+156
    .cfi_rel_offset 3, ARG_SLOT_SIZE+152
    .cfi_rel_offset 2, ARG_SLOT_SIZE+148
    .cfi_rel_offset 1, ARG_SLOT_SIZE+144

    la      $t9, artDeoptimize
    jalr    $t9             # (Thread*)
    move    $a0, rSELF      # pass Thread::current
    break
END art_quick_deoptimize

    /*
     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
     * will long jump to the upcall with a special exception of -1.
     */
    .extern artDeoptimizeFromCompiledCode
ENTRY_NO_GP art_quick_deoptimize_from_compiled_code
    SETUP_SAVE_EVERYTHING_FRAME
    la       $t9, artDeoptimizeFromCompiledCode
    jalr     $t9                            # (DeoptimizationKind, Thread*)
    move     $a1, rSELF                     # pass Thread::current
END art_quick_deoptimize_from_compiled_code

    /*
     * Long integer shift.  This is different from the generic 32/64-bit
     * binary operations because vAA/vBB are 64-bit but vCC (the shift
     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
     * 6 bits.
     * On entry:
     *   $a0: low word
     *   $a1: high word
     *   $a2: shift count
     */
ENTRY_NO_GP art_quick_shl_long
    /* shl-long vAA, vBB, vCC */
    sll     $v0, $a0, $a2                    #  rlo<- alo << (shift&31)
    not     $v1, $a2                         #  rhi<- 31-shift  (shift is 5b)
    srl     $a0, 1
    srl     $a0, $v1                         #  alo<- alo >> (32-(shift&31))
    sll     $v1, $a1, $a2                    #  rhi<- ahi << (shift&31)
    andi    $a2, 0x20                        #  shift< shift & 0x20
    beqz    $a2, 1f
    or      $v1, $a0                         #  rhi<- rhi | alo

    move    $v1, $v0                         #  rhi<- rlo (if shift&0x20)
    move    $v0, $zero                       #  rlo<- 0 (if shift&0x20)

1:  jalr    $zero, $ra
    nop
END art_quick_shl_long

    /*
     * Long integer shift.  This is different from the generic 32/64-bit
     * binary operations because vAA/vBB are 64-bit but vCC (the shift
     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
     * 6 bits.
     * On entry:
     *   $a0: low word
     *   $a1: high word
     *   $a2: shift count
     */
ENTRY_NO_GP art_quick_shr_long
    sra     $v1, $a1, $a2                    #  rhi<- ahi >> (shift&31)
    srl     $v0, $a0, $a2                    #  rlo<- alo >> (shift&31)
    sra     $a3, $a1, 31                     #  $a3<- sign(ah)
    not     $a0, $a2                         #  alo<- 31-shift (shift is 5b)
    sll     $a1, 1
    sll     $a1, $a0                         #  ahi<- ahi << (32-(shift&31))
    andi    $a2, 0x20                        #  shift & 0x20
    beqz    $a2, 1f
    or      $v0, $a1                         #  rlo<- rlo | ahi

    move    $v0, $v1                         #  rlo<- rhi (if shift&0x20)
    move    $v1, $a3                         #  rhi<- sign(ahi) (if shift&0x20)

1:  jalr    $zero, $ra
    nop
END art_quick_shr_long

    /*
     * Long integer shift.  This is different from the generic 32/64-bit
     * binary operations because vAA/vBB are 64-bit but vCC (the shift
     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
     * 6 bits.
     * On entry:
     *   $a0: low word
     *   $a1: high word
     *   $a2: shift count
     */
    /* ushr-long vAA, vBB, vCC */
ENTRY_NO_GP art_quick_ushr_long
    srl     $v1, $a1, $a2                    #  rhi<- ahi >> (shift&31)
    srl     $v0, $a0, $a2                    #  rlo<- alo >> (shift&31)
    not     $a0, $a2                         #  alo<- 31-shift (shift is 5b)
    sll     $a1, 1
    sll     $a1, $a0                         #  ahi<- ahi << (32-(shift&31))
    andi    $a2, 0x20                        #  shift & 0x20
    beqz    $a2, 1f
    or      $v0, $a1                         #  rlo<- rlo | ahi

    move    $v0, $v1                         #  rlo<- rhi (if shift&0x20)
    move    $v1, $zero                       #  rhi<- 0 (if shift&0x20)

1:  jalr    $zero, $ra
    nop
END art_quick_ushr_long

/* java.lang.String.indexOf(int ch, int fromIndex=0) */
ENTRY_NO_GP art_quick_indexof
/* $a0 holds address of "this" */
/* $a1 holds "ch" */
/* $a2 holds "fromIndex" */
#if (STRING_COMPRESSION_FEATURE)
    lw    $a3, MIRROR_STRING_COUNT_OFFSET($a0)    # 'count' field of this
#else
    lw    $t0, MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
#endif
    slt   $t1, $a2, $zero # if fromIndex < 0
#if defined(_MIPS_ARCH_MIPS32R6)
    seleqz $a2, $a2, $t1  #     fromIndex = 0;
#else
    movn   $a2, $zero, $t1 #    fromIndex = 0;
#endif

#if (STRING_COMPRESSION_FEATURE)
    srl   $t0, $a3, 1     # $a3 holds count (with flag) and $t0 holds actual length
#endif
    subu  $t0, $t0, $a2   # this.length() - fromIndex
    blez  $t0, 6f         # if this.length()-fromIndex <= 0
    li    $v0, -1         #     return -1;

#if (STRING_COMPRESSION_FEATURE)
    sll   $a3, $a3, 31    # Extract compression flag.
    beqz  $a3, .Lstring_indexof_compressed
    move  $t2, $a0        # Save a copy in $t2 to later compute result (in branch delay slot).
#endif
    sll   $v0, $a2, 1     # $a0 += $a2 * 2
    addu  $a0, $a0, $v0   #  "  ditto  "
    move  $v0, $a2        # Set i to fromIndex.

1:
    lhu   $t3, MIRROR_STRING_VALUE_OFFSET($a0)    # if this.charAt(i) == ch
    beq   $t3, $a1, 6f                            #     return i;
    addu  $a0, $a0, 2     # i++
    subu  $t0, $t0, 1     # this.length() - i
    bnez  $t0, 1b         # while this.length() - i > 0
    addu  $v0, $v0, 1     # i++

    li    $v0, -1         # if this.length() - i <= 0
                          #     return -1;

6:
    j     $ra
    nop

#if (STRING_COMPRESSION_FEATURE)
.Lstring_indexof_compressed:
    addu  $a0, $a0, $a2   # $a0 += $a2

.Lstring_indexof_compressed_loop:
    lbu   $t3, MIRROR_STRING_VALUE_OFFSET($a0)
    beq   $t3, $a1, .Lstring_indexof_compressed_matched
    subu  $t0, $t0, 1
    bgtz  $t0, .Lstring_indexof_compressed_loop
    addu  $a0, $a0, 1

.Lstring_indexof_nomatch:
    jalr  $zero, $ra
    li    $v0, -1         # return -1;

.Lstring_indexof_compressed_matched:
    jalr  $zero, $ra
    subu  $v0, $a0, $t2   # return (current - start);
#endif
END art_quick_indexof

/* java.lang.String.compareTo(String anotherString) */
ENTRY_NO_GP art_quick_string_compareto
/* $a0 holds address of "this" */
/* $a1 holds address of "anotherString" */
    beq    $a0, $a1, .Lstring_compareto_length_diff   # this and anotherString are the same object
    move   $a3, $a2                                   # trick to return 0 (it returns a2 - a3)

#if (STRING_COMPRESSION_FEATURE)
    lw     $t0, MIRROR_STRING_COUNT_OFFSET($a0)   # 'count' field of this
    lw     $t1, MIRROR_STRING_COUNT_OFFSET($a1)   # 'count' field of anotherString
    sra    $a2, $t0, 1                            # this.length()
    sra    $a3, $t1, 1                            # anotherString.length()
#else
    lw     $a2, MIRROR_STRING_COUNT_OFFSET($a0)   # this.length()
    lw     $a3, MIRROR_STRING_COUNT_OFFSET($a1)   # anotherString.length()
#endif

    MINu   $t2, $a2, $a3
    # $t2 now holds min(this.length(),anotherString.length())

    # while min(this.length(),anotherString.length())-i != 0
    beqz   $t2, .Lstring_compareto_length_diff # if $t2==0
    nop                                        #     return (this.length() - anotherString.length())

#if (STRING_COMPRESSION_FEATURE)
    # Differ cases:
    sll    $t3, $t0, 31
    beqz   $t3, .Lstring_compareto_this_is_compressed
    sll    $t3, $t1, 31                           # In branch delay slot.
    beqz   $t3, .Lstring_compareto_that_is_compressed
    nop
    b      .Lstring_compareto_both_not_compressed
    nop

.Lstring_compareto_this_is_compressed:
    beqz   $t3, .Lstring_compareto_both_compressed
    nop
    /* If (this->IsCompressed() && that->IsCompressed() == false) */
.Lstring_compareto_loop_comparison_this_compressed:
    lbu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)
    lhu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
    bne    $t0, $t1, .Lstring_compareto_char_diff
    addiu  $a0, $a0, 1    # point at this.charAt(i++) - compressed
    subu   $t2, $t2, 1    # new value of min(this.length(),anotherString.length())-i
    bnez   $t2, .Lstring_compareto_loop_comparison_this_compressed
    addiu  $a1, $a1, 2    # point at anotherString.charAt(i++) - uncompressed
    jalr   $zero, $ra
    subu   $v0, $a2, $a3  # return (this.length() - anotherString.length())

.Lstring_compareto_that_is_compressed:
    lhu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)
    lbu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
    bne    $t0, $t1, .Lstring_compareto_char_diff
    addiu  $a0, $a0, 2    # point at this.charAt(i++) - uncompressed
    subu   $t2, $t2, 1    # new value of min(this.length(),anotherString.length())-i
    bnez   $t2, .Lstring_compareto_that_is_compressed
    addiu  $a1, $a1, 1    # point at anotherString.charAt(i++) - compressed
    jalr   $zero, $ra
    subu   $v0, $a2, $a3  # return (this.length() - anotherString.length())

.Lstring_compareto_both_compressed:
    lbu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)
    lbu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
    bne    $t0, $t1, .Lstring_compareto_char_diff
    addiu  $a0, $a0, 1    # point at this.charAt(i++) - compressed
    subu   $t2, $t2, 1    # new value of min(this.length(),anotherString.length())-i
    bnez   $t2, .Lstring_compareto_both_compressed
    addiu  $a1, $a1, 1    # point at anotherString.charAt(i++) - compressed
    jalr   $zero, $ra
    subu   $v0, $a2, $a3  # return (this.length() - anotherString.length())
#endif

.Lstring_compareto_both_not_compressed:
    lhu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)   # while this.charAt(i) == anotherString.charAt(i)
    lhu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
    bne    $t0, $t1, .Lstring_compareto_char_diff # if this.charAt(i) != anotherString.charAt(i)
                          #     return (this.charAt(i) - anotherString.charAt(i))
    addiu  $a0, $a0, 2    # point at this.charAt(i++)
    subu   $t2, $t2, 1    # new value of min(this.length(),anotherString.length())-i
    bnez   $t2, .Lstring_compareto_both_not_compressed
    addiu  $a1, $a1, 2    # point at anotherString.charAt(i++)

.Lstring_compareto_length_diff:
    jalr   $zero, $ra
    subu   $v0, $a2, $a3  # return (this.length() - anotherString.length())

.Lstring_compareto_char_diff:
    jalr   $zero, $ra
    subu   $v0, $t0, $t1  # return (this.charAt(i) - anotherString.charAt(i))
END art_quick_string_compareto

    /*
     * Create a function `name` calling the ReadBarrier::Mark routine,
     * getting its argument and returning its result through register
     * `reg`, saving and restoring all caller-save registers.
     */
.macro READ_BARRIER_MARK_REG name, reg
ENTRY \name
    // Null check so that we can load the lock word.
    bnez    \reg, .Lnot_null_\name
    nop
.Lret_rb_\name:
    jalr    $zero, $ra
    nop
.Lnot_null_\name:
    // Check lock word for mark bit, if marked return.
    lw      $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg)
    .set push
    .set noat
    sll     $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT     # Move mark bit to sign bit.
    bltz    $at, .Lret_rb_\name
#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
    // The below code depends on the lock word state being in the highest bits
    // and the "forwarding address" state having all bits set.
#error "Unexpected lock word state shift or forwarding address state value."
#endif
    // Test that both the forwarding state bits are 1.
    sll     $at, $t9, 1
    and     $at, $at, $t9                               # Sign bit = 1 IFF both bits are 1.
    bltz    $at, .Lret_forwarding_address\name
    nop
    .set pop

    addiu   $sp, $sp, -160      # Includes 16 bytes of space for argument registers a0-a3.
    .cfi_adjust_cfa_offset 160

    sw      $ra, 156($sp)
    .cfi_rel_offset 31, 156
    sw      $t8, 152($sp)
    .cfi_rel_offset 24, 152
    sw      $t7, 148($sp)
    .cfi_rel_offset 15, 148
    sw      $t6, 144($sp)
    .cfi_rel_offset 14, 144
    sw      $t5, 140($sp)
    .cfi_rel_offset 13, 140
    sw      $t4, 136($sp)
    .cfi_rel_offset 12, 136
    sw      $t3, 132($sp)
    .cfi_rel_offset 11, 132
    sw      $t2, 128($sp)
    .cfi_rel_offset 10, 128
    sw      $t1, 124($sp)
    .cfi_rel_offset 9, 124
    sw      $t0, 120($sp)
    .cfi_rel_offset 8, 120
    sw      $a3, 116($sp)
    .cfi_rel_offset 7, 116
    sw      $a2, 112($sp)
    .cfi_rel_offset 6, 112
    sw      $a1, 108($sp)
    .cfi_rel_offset 5, 108
    sw      $a0, 104($sp)
    .cfi_rel_offset 4, 104
    sw      $v1, 100($sp)
    .cfi_rel_offset 3, 100
    sw      $v0, 96($sp)
    .cfi_rel_offset 2, 96

    la      $t9, artReadBarrierMark

    sdc1    $f18, 88($sp)
    sdc1    $f16, 80($sp)
    sdc1    $f14, 72($sp)
    sdc1    $f12, 64($sp)
    sdc1    $f10, 56($sp)
    sdc1    $f8,  48($sp)
    sdc1    $f6,  40($sp)
    sdc1    $f4,  32($sp)
    sdc1    $f2,  24($sp)

    .ifnc \reg, $a0
      move  $a0, \reg           # pass obj from `reg` in a0
    .endif
    jalr    $t9                 # v0 <- artReadBarrierMark(obj)
    sdc1    $f0,  16($sp)       # in delay slot

    lw      $ra, 156($sp)
    .cfi_restore 31
    lw      $t8, 152($sp)
    .cfi_restore 24
    lw      $t7, 148($sp)
    .cfi_restore 15
    lw      $t6, 144($sp)
    .cfi_restore 14
    lw      $t5, 140($sp)
    .cfi_restore 13
    lw      $t4, 136($sp)
    .cfi_restore 12
    lw      $t3, 132($sp)
    .cfi_restore 11
    lw      $t2, 128($sp)
    .cfi_restore 10
    lw      $t1, 124($sp)
    .cfi_restore 9
    lw      $t0, 120($sp)
    .cfi_restore 8
    lw      $a3, 116($sp)
    .cfi_restore 7
    lw      $a2, 112($sp)
    .cfi_restore 6
    lw      $a1, 108($sp)
    .cfi_restore 5
    lw      $a0, 104($sp)
    .cfi_restore 4
    lw      $v1, 100($sp)
    .cfi_restore 3

    .ifnc \reg, $v0
      move  \reg, $v0           # `reg` <- v0
      lw    $v0, 96($sp)
      .cfi_restore 2
    .endif

    ldc1    $f18, 88($sp)
    ldc1    $f16, 80($sp)
    ldc1    $f14, 72($sp)
    ldc1    $f12, 64($sp)
    ldc1    $f10, 56($sp)
    ldc1    $f8,  48($sp)
    ldc1    $f6,  40($sp)
    ldc1    $f4,  32($sp)
    ldc1    $f2,  24($sp)
    ldc1    $f0,  16($sp)

    jalr    $zero, $ra
    addiu   $sp, $sp, 160
    .cfi_adjust_cfa_offset -160

.Lret_forwarding_address\name:
    jalr    $zero, $ra
    // Shift left by the forwarding address shift. This clears out the state bits since they are
    // in the top 2 bits of the lock word.
    sll     \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
END \name
.endm

// Note that art_quick_read_barrier_mark_regXX corresponds to register XX+1.
// ZERO (register 0) is reserved.
// AT (register 1) is reserved as a temporary/scratch register.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, $v0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, $v1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, $a0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, $a1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, $a2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, $a3
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, $t0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, $t1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, $t2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, $t3
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, $t4
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, $t5
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, $t6
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, $t7
// S0 and S1 (registers 16 and 17) are reserved as suspended and thread registers.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, $s2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, $s3
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, $s4
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, $s5
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, $s6
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7
// T8 and T9 (registers 24 and 25) are reserved as temporary/scratch registers.
// K0, K1, GP, SP (registers 26 - 29) are reserved.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
// RA (register 31) is reserved.

// Caller code:
// Short constant offset/index:
// R2:                           | R6:
//  lw      $t9, pReadBarrierMarkReg00
//  beqz    $t9, skip_call       |  beqzc   $t9, skip_call
//  addiu   $t9, $t9, thunk_disp |  nop
//  jalr    $t9                  |  jialc   $t9, thunk_disp
//  nop                          |
// skip_call:                    | skip_call:
//  lw      `out`, ofs(`obj`)    |  lw      `out`, ofs(`obj`)
// [subu    `out`, $zero, `out`] | [subu    `out`, $zero, `out`]  # Unpoison reference.
.macro BRB_FIELD_SHORT_OFFSET_ENTRY obj
1:
    # Explicit null check. May be redundant (for array elements or when the field
    # offset is larger than the page size, 4KB).
    # $ra will be adjusted to point to lw's stack map when throwing NPE.
    beqz    \obj, .Lintrospection_throw_npe
#if defined(_MIPS_ARCH_MIPS32R6)
    lapc    $gp, .Lintrospection_exits                  # $gp = address of .Lintrospection_exits.
#else
    addiu   $gp, $t9, (.Lintrospection_exits - 1b)      # $gp = address of .Lintrospection_exits.
#endif
    .set push
    .set noat
    lw      $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj)
    sll     $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT   # Move barrier state bit
                                                                # to sign bit.
    bltz    $at, .Lintrospection_field_array            # If gray, load reference, mark.
    move    $t8, \obj                                   # Move `obj` to $t8 for common code.
    .set pop
    jalr    $zero, $ra                                  # Otherwise, load-load barrier and return.
    sync
.endm

// Caller code (R2):
// Long constant offset/index:   | Variable index:
//  lw      $t9, pReadBarrierMarkReg00
//  lui     $t8, ofs_hi          |  sll     $t8, `index`, 2
//  beqz    $t9, skip_call       |  beqz    $t9, skip_call
//  addiu   $t9, $t9, thunk_disp |  addiu   $t9, $t9, thunk_disp
//  jalr    $t9                  |  jalr    $t9
// skip_call:                    | skip_call:
//  addu    $t8, $t8, `obj`      |  addu    $t8, $t8, `obj`
//  lw      `out`, ofs_lo($t8)   |  lw      `out`, ofs($t8)
// [subu    `out`, $zero, `out`] | [subu    `out`, $zero, `out`]  # Unpoison reference.
//
// Caller code (R6):
// Long constant offset/index:   | Variable index:
//  lw      $t9, pReadBarrierMarkReg00
//  beqz    $t9, skip_call       |  beqz    $t9, skip_call
//  aui     $t8, `obj`, ofs_hi   |  lsa     $t8, `index`, `obj`, 2
//  jialc   $t9, thunk_disp      |  jialc   $t9, thunk_disp
// skip_call:                    | skip_call:
//  lw      `out`, ofs_lo($t8)   |  lw      `out`, ofs($t8)
// [subu    `out`, $zero, `out`] | [subu    `out`, $zero, `out`]  # Unpoison reference.
.macro BRB_FIELD_LONG_OFFSET_ENTRY obj
1:
    # No explicit null check for variable indices or large constant indices/offsets
    # as it must have been done earlier.
#if defined(_MIPS_ARCH_MIPS32R6)
    lapc    $gp, .Lintrospection_exits                  # $gp = address of .Lintrospection_exits.
#else
    addiu   $gp, $t9, (.Lintrospection_exits - 1b)      # $gp = address of .Lintrospection_exits.
#endif
    .set push
    .set noat
    lw      $at, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj)
    sll     $at, $at, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT   # Move barrier state bit
                                                                # to sign bit.
    bltz    $at, .Lintrospection_field_array            # If gray, load reference, mark.
    nop
    .set pop
    jalr    $zero, $ra                                  # Otherwise, load-load barrier and return.
    sync
    break                                               # Padding to 8 instructions.
.endm

.macro BRB_GC_ROOT_ENTRY root
1:
#if defined(_MIPS_ARCH_MIPS32R6)
    lapc    $gp, .Lintrospection_exit_\root             # $gp = exit point address.
#else
    addiu   $gp, $t9, (.Lintrospection_exit_\root - 1b)  # $gp = exit point address.
#endif
    bnez    \root, .Lintrospection_common
    move    $t8, \root                                  # Move reference to $t8 for common code.
    jalr    $zero, $ra                                  # Return if null.
    # The next instruction (from the following BRB_GC_ROOT_ENTRY) fills the delay slot.
    # This instruction has no effect (actual NOP for the last entry; otherwise changes $gp,
    # which is unused after that anyway).
.endm

.macro BRB_FIELD_EXIT out
.Lintrospection_exit_\out:
    jalr    $zero, $ra
    move    \out, $t8                                   # Return reference in expected register.
.endm

.macro BRB_FIELD_EXIT_BREAK
    break
    break
.endm

ENTRY_NO_GP art_quick_read_barrier_mark_introspection
    # Entry points for offsets/indices not fitting into int16_t and for variable indices.
    BRB_FIELD_LONG_OFFSET_ENTRY $v0
    BRB_FIELD_LONG_OFFSET_ENTRY $v1
    BRB_FIELD_LONG_OFFSET_ENTRY $a0
    BRB_FIELD_LONG_OFFSET_ENTRY $a1
    BRB_FIELD_LONG_OFFSET_ENTRY $a2
    BRB_FIELD_LONG_OFFSET_ENTRY $a3
    BRB_FIELD_LONG_OFFSET_ENTRY $t0
    BRB_FIELD_LONG_OFFSET_ENTRY $t1
    BRB_FIELD_LONG_OFFSET_ENTRY $t2
    BRB_FIELD_LONG_OFFSET_ENTRY $t3
    BRB_FIELD_LONG_OFFSET_ENTRY $t4
    BRB_FIELD_LONG_OFFSET_ENTRY $t5
    BRB_FIELD_LONG_OFFSET_ENTRY $t6
    BRB_FIELD_LONG_OFFSET_ENTRY $t7
    BRB_FIELD_LONG_OFFSET_ENTRY $s2
    BRB_FIELD_LONG_OFFSET_ENTRY $s3
    BRB_FIELD_LONG_OFFSET_ENTRY $s4
    BRB_FIELD_LONG_OFFSET_ENTRY $s5
    BRB_FIELD_LONG_OFFSET_ENTRY $s6
    BRB_FIELD_LONG_OFFSET_ENTRY $s7
    BRB_FIELD_LONG_OFFSET_ENTRY $s8

    # Entry points for offsets/indices fitting into int16_t.
    BRB_FIELD_SHORT_OFFSET_ENTRY $v0
    BRB_FIELD_SHORT_OFFSET_ENTRY $v1
    BRB_FIELD_SHORT_OFFSET_ENTRY $a0
    BRB_FIELD_SHORT_OFFSET_ENTRY $a1
    BRB_FIELD_SHORT_OFFSET_ENTRY $a2
    BRB_FIELD_SHORT_OFFSET_ENTRY $a3
    BRB_FIELD_SHORT_OFFSET_ENTRY $t0
    BRB_FIELD_SHORT_OFFSET_ENTRY $t1
    BRB_FIELD_SHORT_OFFSET_ENTRY $t2
    BRB_FIELD_SHORT_OFFSET_ENTRY $t3
    BRB_FIELD_SHORT_OFFSET_ENTRY $t4
    BRB_FIELD_SHORT_OFFSET_ENTRY $t5
    BRB_FIELD_SHORT_OFFSET_ENTRY $t6
    BRB_FIELD_SHORT_OFFSET_ENTRY $t7
    BRB_FIELD_SHORT_OFFSET_ENTRY $s2
    BRB_FIELD_SHORT_OFFSET_ENTRY $s3
    BRB_FIELD_SHORT_OFFSET_ENTRY $s4
    BRB_FIELD_SHORT_OFFSET_ENTRY $s5
    BRB_FIELD_SHORT_OFFSET_ENTRY $s6
    BRB_FIELD_SHORT_OFFSET_ENTRY $s7
    BRB_FIELD_SHORT_OFFSET_ENTRY $s8

    .global art_quick_read_barrier_mark_introspection_gc_roots
art_quick_read_barrier_mark_introspection_gc_roots:
    # Entry points for GC roots.
    BRB_GC_ROOT_ENTRY $v0
    BRB_GC_ROOT_ENTRY $v1
    BRB_GC_ROOT_ENTRY $a0
    BRB_GC_ROOT_ENTRY $a1
    BRB_GC_ROOT_ENTRY $a2
    BRB_GC_ROOT_ENTRY $a3
    BRB_GC_ROOT_ENTRY $t0
    BRB_GC_ROOT_ENTRY $t1
    BRB_GC_ROOT_ENTRY $t2
    BRB_GC_ROOT_ENTRY $t3
    BRB_GC_ROOT_ENTRY $t4
    BRB_GC_ROOT_ENTRY $t5
    BRB_GC_ROOT_ENTRY $t6
    BRB_GC_ROOT_ENTRY $t7
    BRB_GC_ROOT_ENTRY $s2
    BRB_GC_ROOT_ENTRY $s3
    BRB_GC_ROOT_ENTRY $s4
    BRB_GC_ROOT_ENTRY $s5
    BRB_GC_ROOT_ENTRY $s6
    BRB_GC_ROOT_ENTRY $s7
    BRB_GC_ROOT_ENTRY $s8
    .global art_quick_read_barrier_mark_introspection_end_of_entries
art_quick_read_barrier_mark_introspection_end_of_entries:
    nop                         # Fill the delay slot of the last BRB_GC_ROOT_ENTRY.

.Lintrospection_throw_npe:
    b       art_quick_throw_null_pointer_exception
    addiu   $ra, $ra, 4         # Skip lw, make $ra point to lw's stack map.

    .set push
    .set noat

    // Fields and array elements.

.Lintrospection_field_array:
    // Get the field/element address using $t8 and the offset from the lw instruction.
    lh      $at, 0($ra)         # $ra points to lw: $at = field/element offset.
    addiu   $ra, $ra, 4 + HEAP_POISON_INSTR_SIZE  # Skip lw(+subu).
    addu    $t8, $t8, $at       # $t8 = field/element address.

    // Calculate the address of the exit point, store it in $gp and load the reference into $t8.
    lb      $at, (-HEAP_POISON_INSTR_SIZE - 2)($ra)   # $ra-HEAP_POISON_INSTR_SIZE-4 points to
                                                      # "lw `out`, ...".
    andi    $at, $at, 31        # Extract `out` from lw.
    sll     $at, $at, 3         # Multiply `out` by the exit point size (BRB_FIELD_EXIT* macros).

    lw      $t8, 0($t8)         # $t8 = reference.
    UNPOISON_HEAP_REF $t8

    // Return if null reference.
    bnez    $t8, .Lintrospection_common
    addu    $gp, $gp, $at       # $gp = address of the exit point.

    // Early return through the exit point.
.Lintrospection_return_early:
    jalr    $zero, $gp          # Move $t8 to `out` and return.
    nop

    // Code common for GC roots, fields and array elements.

.Lintrospection_common:
    // Check lock word for mark bit, if marked return.
    lw      $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET($t8)
    sll     $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT     # Move mark bit to sign bit.
    bltz    $at, .Lintrospection_return_early
#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
    // The below code depends on the lock word state being in the highest bits
    // and the "forwarding address" state having all bits set.
#error "Unexpected lock word state shift or forwarding address state value."
#endif
    // Test that both the forwarding state bits are 1.
    sll     $at, $t9, 1
    and     $at, $at, $t9                               # Sign bit = 1 IFF both bits are 1.
    bgez    $at, .Lintrospection_mark
    nop

    .set pop

    // Shift left by the forwarding address shift. This clears out the state bits since they are
    // in the top 2 bits of the lock word.
    jalr    $zero, $gp          # Move $t8 to `out` and return.
    sll     $t8, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT

.Lintrospection_mark:
    // Partially set up the stack frame preserving only $ra.
    addiu   $sp, $sp, -160      # Includes 16 bytes of space for argument registers $a0-$a3.
    .cfi_adjust_cfa_offset 160
    sw      $ra, 156($sp)
    .cfi_rel_offset 31, 156

    // Set up $gp, clobbering $ra and using the branch delay slot for a useful instruction.
    bal     1f
    sw      $gp, 152($sp)       # Preserve the exit point address.
1:
    .cpload $ra

    // Finalize the stack frame and call.
    sw      $t7, 148($sp)
    .cfi_rel_offset 15, 148
    sw      $t6, 144($sp)
    .cfi_rel_offset 14, 144
    sw      $t5, 140($sp)
    .cfi_rel_offset 13, 140
    sw      $t4, 136($sp)
    .cfi_rel_offset 12, 136
    sw      $t3, 132($sp)
    .cfi_rel_offset 11, 132
    sw      $t2, 128($sp)
    .cfi_rel_offset 10, 128
    sw      $t1, 124($sp)
    .cfi_rel_offset 9, 124
    sw      $t0, 120($sp)
    .cfi_rel_offset 8, 120
    sw      $a3, 116($sp)
    .cfi_rel_offset 7, 116
    sw      $a2, 112($sp)
    .cfi_rel_offset 6, 112
    sw      $a1, 108($sp)
    .cfi_rel_offset 5, 108
    sw      $a0, 104($sp)
    .cfi_rel_offset 4, 104
    sw      $v1, 100($sp)
    .cfi_rel_offset 3, 100
    sw      $v0, 96($sp)
    .cfi_rel_offset 2, 96

    la      $t9, artReadBarrierMark

    sdc1    $f18, 88($sp)
    sdc1    $f16, 80($sp)
    sdc1    $f14, 72($sp)
    sdc1    $f12, 64($sp)
    sdc1    $f10, 56($sp)
    sdc1    $f8,  48($sp)
    sdc1    $f6,  40($sp)
    sdc1    $f4,  32($sp)
    sdc1    $f2,  24($sp)
    sdc1    $f0,  16($sp)

    jalr    $t9                 # $v0 <- artReadBarrierMark(reference)
    move    $a0, $t8            # Pass reference in $a0.
    move    $t8, $v0

    lw      $ra, 156($sp)
    .cfi_restore 31
    lw      $gp, 152($sp)       # $gp = address of the exit point.
    lw      $t7, 148($sp)
    .cfi_restore 15
    lw      $t6, 144($sp)
    .cfi_restore 14
    lw      $t5, 140($sp)
    .cfi_restore 13
    lw      $t4, 136($sp)
    .cfi_restore 12
    lw      $t3, 132($sp)
    .cfi_restore 11
    lw      $t2, 128($sp)
    .cfi_restore 10
    lw      $t1, 124($sp)
    .cfi_restore 9
    lw      $t0, 120($sp)
    .cfi_restore 8
    lw      $a3, 116($sp)
    .cfi_restore 7
    lw      $a2, 112($sp)
    .cfi_restore 6
    lw      $a1, 108($sp)
    .cfi_restore 5
    lw      $a0, 104($sp)
    .cfi_restore 4
    lw      $v1, 100($sp)
    .cfi_restore 3
    lw      $v0, 96($sp)
    .cfi_restore 2

    ldc1    $f18, 88($sp)
    ldc1    $f16, 80($sp)
    ldc1    $f14, 72($sp)
    ldc1    $f12, 64($sp)
    ldc1    $f10, 56($sp)
    ldc1    $f8,  48($sp)
    ldc1    $f6,  40($sp)
    ldc1    $f4,  32($sp)
    ldc1    $f2,  24($sp)
    ldc1    $f0,  16($sp)

    // Return through the exit point.
    jalr    $zero, $gp          # Move $t8 to `out` and return.
    addiu   $sp, $sp, 160
    .cfi_adjust_cfa_offset -160

.Lintrospection_exits:
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT $v0
    BRB_FIELD_EXIT $v1
    BRB_FIELD_EXIT $a0
    BRB_FIELD_EXIT $a1
    BRB_FIELD_EXIT $a2
    BRB_FIELD_EXIT $a3
    BRB_FIELD_EXIT $t0
    BRB_FIELD_EXIT $t1
    BRB_FIELD_EXIT $t2
    BRB_FIELD_EXIT $t3
    BRB_FIELD_EXIT $t4
    BRB_FIELD_EXIT $t5
    BRB_FIELD_EXIT $t6
    BRB_FIELD_EXIT $t7
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT $s2
    BRB_FIELD_EXIT $s3
    BRB_FIELD_EXIT $s4
    BRB_FIELD_EXIT $s5
    BRB_FIELD_EXIT $s6
    BRB_FIELD_EXIT $s7
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT_BREAK
    BRB_FIELD_EXIT $s8
    BRB_FIELD_EXIT_BREAK
END art_quick_read_barrier_mark_introspection

    /*
     * Polymorphic method invocation.
     * On entry:
     *   a0 = unused
     *   a1 = receiver
     */
.extern artInvokePolymorphic
ENTRY art_quick_invoke_polymorphic
    SETUP_SAVE_REFS_AND_ARGS_FRAME
    move    $a0, $a1                            # Make $a0 the receiver.
    move    $a1, rSELF                          # Make $a1 an alias for the current Thread.
    la      $t9, artInvokePolymorphic           # Invoke artInvokePolymorphic
    jalr    $t9                                 # with args (receiver, Thread*, context).
    addiu   $a2, $sp, ARG_SLOT_SIZE             # Make $a2 a pointer to the saved frame context.
    lw      $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    bnez    $t7, 1f
    # don't care if $v0 and/or $v1 are modified, when exception branch taken
    MTD     $v0, $v1, $f0, $f1                  # move float value to return value
    jalr    $zero, $ra
    nop
1:
    DELIVER_PENDING_EXCEPTION
END art_quick_invoke_polymorphic

    /*
     * InvokeCustom invocation.
     * On entry:
     *   a0 = call_site_idx
     */
.extern artInvokeCustom
ENTRY art_quick_invoke_custom
    SETUP_SAVE_REFS_AND_ARGS_FRAME
    move    $a1, rSELF                          # Make $a1 an alias for the current Thread.
    la      $t9, artInvokeCustom                # Invoke artInvokeCustom
    jalr    $t9                                 # with args (call_site_idx, Thread*, context).
    addiu   $a2, $sp, ARG_SLOT_SIZE             # Make $a2 a pointer to the saved frame context.
    lw      $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    bnez    $t7, 1f
    # don't care if $v0 and/or $v1 are modified, when exception branch taken
    MTD     $v0, $v1, $f0, $f1                  # move float value to return value
    jalr    $zero, $ra
    nop
END art_quick_invoke_custom
